R Markdown

library(readr)
library(Hotelling)
library(car)
library(stats)
library(biotools)


# Load the dataset
 
BreastCancer_data <- read_csv("~/Downloads/wdbc.data")
names(BreastCancer_data) <- c('ID', 'Diagnosis', 'mean_radius', 'mean_texture', 'mean_perimeter', 'mean_area', 'mean_smoothness', 'mean_compactness', 'mean_concavity', 'mean_concave_points', 'mean_symmetry', 'mean_fractal_dimension', 'se_radius', 'se_texture', 'se_perimeter', 'se_area', 'se_smoothness', 'se_compactness', 'se_concavity', 'se_concave_points', 'se_symmetry', 'se_fractal_dimension', 'worst_radius', 'worst_texture', 'worst_perimeter', 'worst_area', 'worst_smoothness', 'worst_compactness', 'worst_concavity', 'worst_concave_points', 'worst_symmetry', 'worst_fractal_dimension')
BreastCancer_data
## # A tibble: 568 × 32
##          ID Diagnosis mean_rad…¹ mean_…² mean_…³ mean_…⁴ mean_…⁵ mean_…⁶ mean_…⁷
##       <dbl> <chr>          <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
##  1   842517 M               20.6    17.8   133.    1326   0.0847  0.0786  0.0869
##  2 84300903 M               19.7    21.2   130     1203   0.110   0.160   0.197 
##  3 84348301 M               11.4    20.4    77.6    386.  0.142   0.284   0.241 
##  4 84358402 M               20.3    14.3   135.    1297   0.100   0.133   0.198 
##  5   843786 M               12.4    15.7    82.6    477.  0.128   0.17    0.158 
##  6   844359 M               18.2    20.0   120.    1040   0.0946  0.109   0.113 
##  7 84458202 M               13.7    20.8    90.2    578.  0.119   0.164   0.0937
##  8   844981 M               13      21.8    87.5    520.  0.127   0.193   0.186 
##  9 84501001 M               12.5    24.0    84.0    476.  0.119   0.240   0.227 
## 10   845636 M               16.0    23.2   103.     798.  0.0821  0.0667  0.0330
## # … with 558 more rows, 23 more variables: mean_concave_points <dbl>,
## #   mean_symmetry <dbl>, mean_fractal_dimension <dbl>, se_radius <dbl>,
## #   se_texture <dbl>, se_perimeter <dbl>, se_area <dbl>, se_smoothness <dbl>,
## #   se_compactness <dbl>, se_concavity <dbl>, se_concave_points <dbl>,
## #   se_symmetry <dbl>, se_fractal_dimension <dbl>, worst_radius <dbl>,
## #   worst_texture <dbl>, worst_perimeter <dbl>, worst_area <dbl>,
## #   worst_smoothness <dbl>, worst_compactness <dbl>, worst_concavity <dbl>, …
BC_data <- BreastCancer_data
BC_data
## # A tibble: 568 × 32
##          ID Diagnosis mean_rad…¹ mean_…² mean_…³ mean_…⁴ mean_…⁵ mean_…⁶ mean_…⁷
##       <dbl> <chr>          <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
##  1   842517 M               20.6    17.8   133.    1326   0.0847  0.0786  0.0869
##  2 84300903 M               19.7    21.2   130     1203   0.110   0.160   0.197 
##  3 84348301 M               11.4    20.4    77.6    386.  0.142   0.284   0.241 
##  4 84358402 M               20.3    14.3   135.    1297   0.100   0.133   0.198 
##  5   843786 M               12.4    15.7    82.6    477.  0.128   0.17    0.158 
##  6   844359 M               18.2    20.0   120.    1040   0.0946  0.109   0.113 
##  7 84458202 M               13.7    20.8    90.2    578.  0.119   0.164   0.0937
##  8   844981 M               13      21.8    87.5    520.  0.127   0.193   0.186 
##  9 84501001 M               12.5    24.0    84.0    476.  0.119   0.240   0.227 
## 10   845636 M               16.0    23.2   103.     798.  0.0821  0.0667  0.0330
## # … with 558 more rows, 23 more variables: mean_concave_points <dbl>,
## #   mean_symmetry <dbl>, mean_fractal_dimension <dbl>, se_radius <dbl>,
## #   se_texture <dbl>, se_perimeter <dbl>, se_area <dbl>, se_smoothness <dbl>,
## #   se_compactness <dbl>, se_concavity <dbl>, se_concave_points <dbl>,
## #   se_symmetry <dbl>, se_fractal_dimension <dbl>, worst_radius <dbl>,
## #   worst_texture <dbl>, worst_perimeter <dbl>, worst_area <dbl>,
## #   worst_smoothness <dbl>, worst_compactness <dbl>, worst_concavity <dbl>, …
attach(BC_data)
str(BC_data)
## spc_tbl_ [568 × 32] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ ID                     : num [1:568] 842517 84300903 84348301 84358402 843786 ...
##  $ Diagnosis              : chr [1:568] "M" "M" "M" "M" ...
##  $ mean_radius            : num [1:568] 20.6 19.7 11.4 20.3 12.4 ...
##  $ mean_texture           : num [1:568] 17.8 21.2 20.4 14.3 15.7 ...
##  $ mean_perimeter         : num [1:568] 132.9 130 77.6 135.1 82.6 ...
##  $ mean_area              : num [1:568] 1326 1203 386 1297 477 ...
##  $ mean_smoothness        : num [1:568] 0.0847 0.1096 0.1425 0.1003 0.1278 ...
##  $ mean_compactness       : num [1:568] 0.0786 0.1599 0.2839 0.1328 0.17 ...
##  $ mean_concavity         : num [1:568] 0.0869 0.1974 0.2414 0.198 0.1578 ...
##  $ mean_concave_points    : num [1:568] 0.0702 0.1279 0.1052 0.1043 0.0809 ...
##  $ mean_symmetry          : num [1:568] 0.181 0.207 0.26 0.181 0.209 ...
##  $ mean_fractal_dimension : num [1:568] 0.0567 0.06 0.0974 0.0588 0.0761 ...
##  $ se_radius              : num [1:568] 0.543 0.746 0.496 0.757 0.335 ...
##  $ se_texture             : num [1:568] 0.734 0.787 1.156 0.781 0.89 ...
##  $ se_perimeter           : num [1:568] 3.4 4.58 3.44 5.44 2.22 ...
##  $ se_area                : num [1:568] 74.1 94 27.2 94.4 27.2 ...
##  $ se_smoothness          : num [1:568] 0.00522 0.00615 0.00911 0.01149 0.00751 ...
##  $ se_compactness         : num [1:568] 0.0131 0.0401 0.0746 0.0246 0.0335 ...
##  $ se_concavity           : num [1:568] 0.0186 0.0383 0.0566 0.0569 0.0367 ...
##  $ se_concave_points      : num [1:568] 0.0134 0.0206 0.0187 0.0188 0.0114 ...
##  $ se_symmetry            : num [1:568] 0.0139 0.0225 0.0596 0.0176 0.0216 ...
##  $ se_fractal_dimension   : num [1:568] 0.00353 0.00457 0.00921 0.00511 0.00508 ...
##  $ worst_radius           : num [1:568] 25 23.6 14.9 22.5 15.5 ...
##  $ worst_texture          : num [1:568] 23.4 25.5 26.5 16.7 23.8 ...
##  $ worst_perimeter        : num [1:568] 158.8 152.5 98.9 152.2 103.4 ...
##  $ worst_area             : num [1:568] 1956 1709 568 1575 742 ...
##  $ worst_smoothness       : num [1:568] 0.124 0.144 0.21 0.137 0.179 ...
##  $ worst_compactness      : num [1:568] 0.187 0.424 0.866 0.205 0.525 ...
##  $ worst_concavity        : num [1:568] 0.242 0.45 0.687 0.4 0.535 ...
##  $ worst_concave_points   : num [1:568] 0.186 0.243 0.258 0.163 0.174 ...
##  $ worst_symmetry         : num [1:568] 0.275 0.361 0.664 0.236 0.399 ...
##  $ worst_fractal_dimension: num [1:568] 0.089 0.0876 0.173 0.0768 0.1244 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   `842302` = col_double(),
##   ..   M = col_character(),
##   ..   `17.99` = col_double(),
##   ..   `10.38` = col_double(),
##   ..   `122.8` = col_double(),
##   ..   `1001` = col_double(),
##   ..   `0.1184` = col_double(),
##   ..   `0.2776` = col_double(),
##   ..   `0.3001` = col_double(),
##   ..   `0.1471` = col_double(),
##   ..   `0.2419` = col_double(),
##   ..   `0.07871` = col_double(),
##   ..   `1.095` = col_double(),
##   ..   `0.9053` = col_double(),
##   ..   `8.589` = col_double(),
##   ..   `153.4` = col_double(),
##   ..   `0.006399` = col_double(),
##   ..   `0.04904` = col_double(),
##   ..   `0.05373` = col_double(),
##   ..   `0.01587` = col_double(),
##   ..   `0.03003` = col_double(),
##   ..   `0.006193` = col_double(),
##   ..   `25.38` = col_double(),
##   ..   `17.33` = col_double(),
##   ..   `184.6` = col_double(),
##   ..   `2019` = col_double(),
##   ..   `0.1622` = col_double(),
##   ..   `0.6656` = col_double(),
##   ..   `0.7119` = col_double(),
##   ..   `0.2654` = col_double(),
##   ..   `0.4601` = col_double(),
##   ..   `0.1189` = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
BC_data$Diagnosis <- as.factor(BC_data$Diagnosis)

BC_data_x <- BC_data[, 3:32]
BC_data_x
## # A tibble: 568 × 30
##    mean_radius mean_te…¹ mean_…² mean_…³ mean_…⁴ mean_…⁵ mean_…⁶ mean_…⁷ mean_…⁸
##          <dbl>     <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
##  1        20.6      17.8   133.    1326   0.0847  0.0786  0.0869  0.0702   0.181
##  2        19.7      21.2   130     1203   0.110   0.160   0.197   0.128    0.207
##  3        11.4      20.4    77.6    386.  0.142   0.284   0.241   0.105    0.260
##  4        20.3      14.3   135.    1297   0.100   0.133   0.198   0.104    0.181
##  5        12.4      15.7    82.6    477.  0.128   0.17    0.158   0.0809   0.209
##  6        18.2      20.0   120.    1040   0.0946  0.109   0.113   0.074    0.179
##  7        13.7      20.8    90.2    578.  0.119   0.164   0.0937  0.0598   0.220
##  8        13        21.8    87.5    520.  0.127   0.193   0.186   0.0935   0.235
##  9        12.5      24.0    84.0    476.  0.119   0.240   0.227   0.0854   0.203
## 10        16.0      23.2   103.     798.  0.0821  0.0667  0.0330  0.0332   0.153
## # … with 558 more rows, 21 more variables: mean_fractal_dimension <dbl>,
## #   se_radius <dbl>, se_texture <dbl>, se_perimeter <dbl>, se_area <dbl>,
## #   se_smoothness <dbl>, se_compactness <dbl>, se_concavity <dbl>,
## #   se_concave_points <dbl>, se_symmetry <dbl>, se_fractal_dimension <dbl>,
## #   worst_radius <dbl>, worst_texture <dbl>, worst_perimeter <dbl>,
## #   worst_area <dbl>, worst_smoothness <dbl>, worst_compactness <dbl>,
## #   worst_concavity <dbl>, worst_concave_points <dbl>, worst_symmetry <dbl>, …
BC_data_cm <- colMeans(BC_data_x)
BC_data_S <- cov(BC_data_x)
BC_data_d <- apply(BC_data_x, MARGIN = 1, function(BC_data_x)t(BC_data_x - BC_data_cm) %*% solve(BC_data_S) %*% (BC_data_x - BC_data_cm))
BC_data_cm
##             mean_radius            mean_texture          mean_perimeter 
##            1.412049e+01            1.930533e+01            9.191475e+01 
##               mean_area         mean_smoothness        mean_compactness 
##            6.542798e+02            9.632148e-02            1.040360e-01 
##          mean_concavity     mean_concave_points           mean_symmetry 
##            8.842731e-02            4.874629e-02            1.810549e-01 
##  mean_fractal_dimension               se_radius              se_texture 
##            6.276960e-02            4.039576e-01            1.217402e+00 
##            se_perimeter                 se_area           se_smoothness 
##            2.855984e+00            4.013802e+01            7.042109e-03 
##          se_compactness            se_concavity       se_concave_points 
##            2.543666e-02            3.185527e-02            1.178896e-02 
##             se_symmetry    se_fractal_dimension            worst_radius 
##            2.052560e-02            3.790682e-03            1.625315e+01 
##           worst_texture         worst_perimeter              worst_area 
##            2.569192e+01            1.071251e+02            8.785789e+02 
##        worst_smoothness       worst_compactness         worst_concavity 
##            1.323161e-01            2.535409e-01            2.714143e-01 
##    worst_concave_points          worst_symmetry worst_fractal_dimension 
##            1.143407e-01            2.897762e-01            8.388428e-02
BC_data_S
##                           mean_radius  mean_texture mean_perimeter
## mean_radius              1.241446e+01  4.977041e+00   8.538744e+01
## mean_texture             4.977041e+00  1.839128e+01   3.498582e+01
## mean_perimeter           8.538744e+01  3.498582e+01   5.898024e+02
## mean_area                1.224281e+03  4.922992e+02   8.431797e+03
## mean_smoothness          8.318960e-03 -1.070339e-03   6.976046e-02
## mean_compactness         9.318078e-02  5.658896e-02   7.065370e-01
## mean_concavity           1.890209e-01  1.072014e-01   1.378170e+00
## mean_concave_points      1.120034e-01  5.060880e-02   7.984274e-01
## mean_symmetry            1.388384e-02  9.389823e-03   1.188281e-01
## mean_fractal_dimension  -7.875976e-03 -2.074770e-03  -4.580476e-02
## se_radius                6.601130e-01  3.404765e-01   4.632046e+00
## se_texture              -1.873961e-01  9.134076e-01  -1.148069e+00
## se_perimeter             4.772965e+00  2.543855e+00   3.380135e+01
## se_area                  1.174046e+02  5.271029e+01   8.187864e+02
## se_smoothness           -2.355109e-03  7.545494e-05  -1.477929e-02
## se_compactness           1.286264e-02  1.518357e-02   1.080202e-01
## se_concavity             2.054624e-02  1.898047e-02   1.664018e-01
## se_concave_points        8.166187e-03  4.420177e-03   6.094047e-02
## se_symmetry             -3.109091e-03  4.744283e-04  -1.694215e-02
## se_fractal_dimension    -4.146920e-04  6.586149e-04  -4.863904e-04
## worst_radius             1.648070e+01  7.485610e+00   1.135626e+02
## worst_texture            6.501412e+00  2.402127e+01   4.579262e+01
## worst_perimeter          1.139623e+02  5.305461e+01   7.895129e+02
## worst_area               1.883788e+03  8.606878e+02   1.298711e+04
## worst_smoothness         9.438014e-03  8.094079e-03   8.204788e-02
## worst_compactness        2.268464e-01  1.948166e-01   1.723145e+00
## worst_concavity          3.850688e-01  2.775081e-01   2.839595e+00
## worst_concave_points     1.716677e-01  8.601180e-02   1.225807e+00
## worst_symmetry           3.464846e-02  3.066769e-02   2.755397e-01
## worst_fractal_dimension  2.119824e-04  9.826686e-03   2.052601e-02
##                             mean_area mean_smoothness mean_compactness
## mean_radius              1.224281e+03    8.318960e-03     9.318078e-02
## mean_texture             4.922992e+02   -1.070339e-03     5.658896e-02
## mean_perimeter           8.431797e+03    6.976046e-02     7.065370e-01
## mean_area                1.238503e+05    8.642461e-01     9.175323e+00
## mean_smoothness          8.642461e-01    1.972903e-04     4.836908e-04
## mean_compactness         9.175323e+00    4.836908e-04     2.741070e-03
## mean_concavity           1.914966e+01    5.780471e-04     3.660011e-03
## mean_concave_points      1.120175e+01    2.988769e-04     1.676183e-03
## mean_symmetry            1.425029e+00    2.130687e-04     8.554644e-04
## mean_fractal_dimension  -7.143974e-01    5.755138e-05     2.063140e-04
## se_radius                7.119520e+01    1.150983e-03     7.087509e-03
## se_texture              -1.269935e+01    5.437959e-04     1.443878e-03
## se_perimeter             5.144223e+02    8.211561e-03     5.696347e-02
## se_area                  1.276197e+04    1.536177e-01     1.062030e+00
## se_smoothness           -1.761392e-01    1.408523e-05     2.168893e-05
## se_compactness           1.327680e+00    7.955419e-05     6.926883e-04
## se_concavity             2.196490e+00    1.047898e-04     9.044430e-04
## se_concave_points        8.073949e-01    3.293454e-05     2.084158e-04
## se_symmetry             -2.170701e-01    2.301361e-05     9.767365e-05
## se_fractal_dimension    -2.001763e-02    1.047957e-05     7.028679e-05
## worst_radius             1.634838e+03    1.415769e-02     1.340950e-01
## worst_texture            6.280260e+02    3.448643e-03     8.324134e-02
## worst_perimeter          1.131450e+04    1.100670e-01     1.025586e+00
## worst_area               1.918354e+05    1.613890e+00     1.500198e+01
## worst_smoothness         9.760221e-01    2.578987e-04     6.740252e-04
## worst_compactness        2.140320e+01    1.031305e-03     7.081106e-03
## worst_concavity          3.743191e+01    1.261252e-03     8.875002e-03
## worst_concave_points     1.663904e+01    4.600052e-04     2.790102e-03
## worst_symmetry           3.027352e+00    3.370779e-04     1.617994e-03
## worst_fractal_dimension  2.423656e-03    1.256969e-04     6.461268e-04
##                         mean_concavity mean_concave_points mean_symmetry
## mean_radius               1.890209e-01        1.120034e-01  1.388384e-02
## mean_texture              1.072014e-01        5.060880e-02  9.389823e-03
## mean_perimeter            1.378170e+00        7.984274e-01  1.188281e-01
## mean_area                 1.914966e+01        1.120175e+01  1.425029e+00
## mean_smoothness           5.780471e-04        2.988769e-04  2.130687e-04
## mean_compactness          3.660011e-03        1.676183e-03  8.554644e-04
## mean_concavity            6.287574e-03        2.818564e-03  1.073442e-03
## mean_concave_points       2.818564e-03        1.491285e-03  4.823144e-04
## mean_symmetry             1.073442e-03        4.823144e-04  7.463505e-04
## mean_fractal_dimension    1.839527e-04        4.304948e-05  9.134733e-05
## se_radius                 1.373727e-02        7.404969e-03  2.236431e-03
## se_texture                3.474098e-03        5.146358e-04  1.973398e-03
## se_perimeter              1.044943e-01        5.485873e-02  1.681490e-02
## se_area                   2.200860e+00        1.201038e+00  2.676738e-01
## se_smoothness             2.387351e-05        3.338825e-06  1.551484e-05
## se_compactness            9.498062e-04        3.373040e-04  2.048459e-04
## se_concavity              1.658273e-03        5.115180e-04  2.816906e-04
## se_concave_points         3.351633e-04        1.469513e-04  6.620805e-05
## se_symmetry               1.139719e-04        2.899277e-05  1.009432e-04
## se_fractal_dimension      9.404932e-05        2.607809e-05  2.385292e-05
## worst_radius              2.622474e-01        1.544149e-01  2.367461e-02
## worst_texture             1.503095e-01        7.139010e-02  1.619689e-02
## worst_perimeter           1.928925e+00        1.104569e+00  1.939526e-01
## worst_area                3.031152e+01        1.772095e+01  2.648439e+00
## worst_smoothness          8.072486e-04        3.966553e-04  2.643405e-04
## worst_compactness         9.332580e-03        4.010709e-03  2.000497e-03
## worst_concavity           1.456572e-02        6.025305e-03  2.437763e-03
## worst_concave_points      4.465145e-03        2.299381e-03  7.605824e-04
## worst_symmetry            1.959591e-03        8.741219e-04  1.170789e-03
## worst_fractal_dimension   7.296756e-04        2.527607e-04  2.137065e-04
##                         mean_fractal_dimension     se_radius    se_texture
## mean_radius                      -7.875976e-03  6.601130e-01 -1.873961e-01
## mean_texture                     -2.074770e-03  3.404765e-01  9.134076e-01
## mean_perimeter                   -4.580476e-02  4.632046e+00 -1.148069e+00
## mean_area                        -7.143974e-01  7.119520e+01 -1.269935e+01
## mean_smoothness                   5.755138e-05  1.150983e-03  5.437959e-04
## mean_compactness                  2.063140e-04  7.087509e-03  1.443878e-03
## mean_concavity                    1.839527e-04  1.373727e-02  3.474098e-03
## mean_concave_points               4.304948e-05  7.404969e-03  5.146358e-04
## mean_symmetry                     9.134733e-05  2.236431e-03  1.973398e-03
## mean_fractal_dimension            4.948929e-05 -1.917584e-05  6.493176e-04
## se_radius                        -1.917584e-05  7.619724e-02  3.305964e-02
## se_texture                        6.493176e-04  3.305964e-02  3.046812e-01
## se_perimeter                      4.086837e-04  5.394188e-01  2.525033e-01
## se_area                          -3.219083e-02  1.189094e+01  2.866957e+00
## se_smoothness                     8.554267e-06  1.380045e-04  6.587740e-04
## se_compactness                    7.024720e-05  1.742678e-03  2.305972e-03
## se_concavity                      9.474186e-05  2.760451e-03  3.264879e-03
## se_concave_points                 1.477579e-05  8.749705e-04  7.874699e-04
## se_symmetry                       1.990465e-05  5.408787e-04  1.885577e-03
## se_fractal_dimension              1.281108e-05  1.644951e-04  4.103517e-04
## worst_radius                     -8.928486e-03  9.490021e-01 -2.933045e-01
## worst_texture                    -1.994071e-03  3.427805e-01  1.384605e+00
## worst_perimeter                  -5.093144e-02  6.623880e+00 -1.856009e+00
## worst_area                       -9.656728e-01  1.174838e+02 -2.554974e+01
## worst_smoothness                  8.070420e-05  8.638162e-04 -9.129688e-04
## worst_compactness                 4.989919e-04  1.204749e-02 -7.810953e-03
## worst_concavity                   4.985273e-04  2.152133e-02 -7.707941e-03
## worst_concave_points              7.727167e-05  9.513715e-03 -4.262841e-03
## worst_symmetry                    1.413789e-04  1.417677e-03 -4.289981e-03
## worst_fractal_dimension           9.703487e-05  2.060610e-04 -4.364398e-04
##                         se_perimeter       se_area se_smoothness se_compactness
## mean_radius             4.772965e+00  1.174046e+02 -2.355109e-03   1.286264e-02
## mean_texture            2.543855e+00  5.271029e+01  7.545494e-05   1.518357e-02
## mean_perimeter          3.380135e+01  8.187864e+02 -1.477929e-02   1.080202e-01
## mean_area               5.144223e+02  1.276197e+04 -1.761392e-01   1.327680e+00
## mean_smoothness         8.211561e-03  1.536177e-01  1.408523e-05   7.955419e-05
## mean_compactness        5.696347e-02  1.062030e+00  2.168893e-05   6.926883e-04
## mean_concavity          1.044943e-01  2.200860e+00  2.387351e-05   9.498062e-04
## mean_concave_points     5.485873e-02  1.201038e+00  3.338825e-06   3.373040e-04
## mean_symmetry           1.681490e-02  2.676738e-01  1.551484e-05   2.048459e-04
## mean_fractal_dimension  4.086837e-04 -3.219083e-02  8.554267e-06   7.024720e-05
## se_radius               5.394188e-01  1.189094e+01  1.380045e-04   1.742678e-03
## se_texture              2.525033e-01  2.866957e+00  6.587740e-04   2.305972e-03
## se_perimeter            4.037240e+00  8.525089e+01  9.252348e-04   1.486244e-02
## se_area                 8.525089e+01  2.050496e+03  1.041094e-02   2.277507e-01
## se_smoothness           9.252348e-04  1.041094e-02  9.030286e-06   1.816268e-05
## se_compactness          1.486244e-02  2.277507e-01  1.816268e-05   3.202877e-04
## se_concavity            2.194114e-02  3.682852e-01  2.441974e-05   4.330025e-04
## se_concave_points       6.910675e-03  1.160842e-01  6.099966e-06   8.219549e-05
## se_symmetry             4.365829e-03  4.862480e-02  1.029205e-05   5.813967e-05
## se_fractal_dimension    1.284213e-03  1.484377e-02  3.404143e-06   3.803125e-05
## worst_radius            6.733017e+00  1.649966e+02 -3.343330e-03   1.736165e-02
## worst_texture           2.578768e+00  5.670466e+01 -1.391226e-03   1.611529e-02
## worst_perimeter         4.829089e+01  1.150203e+03 -2.187521e-02   1.538251e-01
## worst_area              8.311382e+02  2.082563e+04 -3.107221e-01   1.989014e+00
## worst_smoothness        5.712771e-03  1.245089e-01  2.162942e-05   9.190075e-05
## worst_compactness       1.048013e-01  1.948788e+00 -2.582572e-05   1.898788e-03
## worst_concavity         1.725605e-01  3.573414e+00 -3.608367e-05   2.373811e-03
## worst_concave_points    7.235189e-02  1.581957e+00 -1.999679e-05   5.635329e-04
## worst_symmetry          1.205598e-02  1.750265e-01 -1.978199e-05   3.013360e-04
## worst_fractal_dimension 2.771839e-03  7.453815e-03  5.552556e-06   1.900289e-04
##                         se_concavity se_concave_points   se_symmetry
## mean_radius             2.054624e-02      8.166187e-03 -3.109091e-03
## mean_texture            1.898047e-02      4.420177e-03  4.744283e-04
## mean_perimeter          1.664018e-01      6.094047e-02 -1.694215e-02
## mean_area               2.196490e+00      8.073949e-01 -2.170701e-01
## mean_smoothness         1.047898e-04      3.293454e-05  2.301361e-05
## mean_compactness        9.044430e-04      2.084158e-04  9.767365e-05
## mean_concavity          1.658273e-03      3.351633e-04  1.139719e-04
## mean_concave_points     5.115180e-04      1.469513e-04  2.899277e-05
## mean_symmetry           2.816906e-04      6.620805e-05  1.009432e-04
## mean_fractal_dimension  9.474186e-05      1.477579e-05  1.990465e-05
## se_radius               2.760451e-03      8.749705e-04  5.408787e-04
## se_texture              3.264879e-03      7.874699e-04  1.885577e-03
## se_perimeter            2.194114e-02      6.910675e-03  4.365829e-03
## se_area                 3.682852e-01      1.160842e-01  4.862480e-02
## se_smoothness           2.441974e-05      6.099966e-06  1.029205e-05
## se_compactness          4.330025e-04      8.219549e-05  5.813967e-05
## se_concavity            9.119629e-04      1.438500e-04  7.698160e-05
## se_concave_points       1.438500e-04      3.811024e-05  1.591348e-05
## se_symmetry             7.698160e-05      1.591348e-05  6.829438e-05
## se_fractal_dimension    5.810841e-05      9.976860e-06  8.047034e-06
## worst_radius            2.696517e-02      1.063349e-02 -5.280606e-03
## worst_texture           1.895267e-02      3.355469e-03 -3.803232e-03
## worst_perimeter         2.273502e-01      8.148585e-02 -3.016683e-02
## worst_area              3.198939e+00      1.196358e+00 -5.393287e-01
## worst_smoothness        1.151748e-04      3.017795e-05 -2.894072e-06
## worst_compactness       2.290961e-03      4.374838e-04  7.161077e-05
## worst_concavity         4.162923e-03      7.055574e-04  5.675643e-05
## worst_concave_points    8.697091e-04      2.436915e-04 -1.908251e-05
## worst_symmetry          3.634677e-04      5.350555e-05  1.966492e-04
## worst_fractal_dimension 2.385956e-04      3.442980e-05  1.109199e-05
##                         se_fractal_dimension  worst_radius worst_texture
## mean_radius                    -4.146920e-04  1.648070e+01  6.501412e+00
## mean_texture                    6.586149e-04  7.485610e+00  2.402127e+01
## mean_perimeter                 -4.863904e-04  1.135626e+02  4.579262e+01
## mean_area                      -2.001763e-02  1.634838e+03  6.280260e+02
## mean_smoothness                 1.047957e-05  1.415769e-02  3.448643e-03
## mean_compactness                7.028679e-05  1.340950e-01  8.324134e-02
## mean_concavity                  9.404932e-05  2.622474e-01  1.503095e-01
## mean_concave_points             2.607809e-05  1.544149e-01  7.139010e-02
## mean_symmetry                   2.385292e-05  2.367461e-02  1.619689e-02
## mean_fractal_dimension          1.281108e-05 -8.928486e-03 -1.994071e-03
## se_radius                       1.644951e-04  9.490021e-01  3.427805e-01
## se_texture                      4.103517e-04 -2.933045e-01  1.384605e+00
## se_perimeter                    1.284213e-03  6.733017e+00  2.578768e+00
## se_area                         1.484377e-02  1.649966e+02  5.670466e+01
## se_smoothness                   3.404143e-06 -3.343330e-03 -1.391226e-03
## se_compactness                  3.803125e-05  1.736165e-02  1.611529e-02
## se_concavity                    5.810841e-05  2.696517e-02  1.895267e-02
## se_concave_points               9.976860e-06  1.063349e-02  3.355469e-03
## se_symmetry                     8.047034e-06 -5.280606e-03 -3.803232e-03
## se_fractal_dimension            7.003880e-06 -5.188800e-04 -1.668750e-05
## worst_radius                   -5.188800e-04  2.325477e+01  1.084515e+01
## worst_texture                  -1.668750e-05  1.084515e+01  3.772001e+01
## worst_perimeter                -4.167831e-04  1.604270e+02  7.667725e+01
## worst_area                     -3.913711e-02  2.694301e+03  1.229169e+03
## worst_smoothness                1.019688e-05  2.346196e-02  3.213115e-02
## worst_compactness               1.609760e-04  3.558521e-01  3.556175e-01
## worst_concavity                 2.082661e-04  5.726996e-01  4.796577e-01
## worst_concave_points            3.685808e-05  2.481787e-01  1.478239e-01
## worst_symmetry                  1.749841e-05  7.021180e-02  9.127322e-02
## worst_fractal_dimension         2.816214e-05  7.613078e-03  2.488300e-02
##                         worst_perimeter    worst_area worst_smoothness
## mean_radius                1.139623e+02  1.883788e+03     9.438014e-03
## mean_texture               5.305461e+01  8.606878e+02     8.094079e-03
## mean_perimeter             7.895129e+02  1.298711e+04     8.204788e-02
## mean_area                  1.131450e+04  1.918354e+05     9.760221e-01
## mean_smoothness            1.100670e-01  1.613890e+00     2.578987e-04
## mean_compactness           1.025586e+00  1.500198e+01     6.740252e-04
## mean_concavity             1.928925e+00  3.031152e+01     8.072486e-04
## mean_concave_points        1.104569e+00  1.772095e+01     3.966553e-04
## mean_symmetry              1.939526e-01  2.648439e+00     2.643405e-04
## mean_fractal_dimension    -5.093144e-02 -9.656728e-01     8.070420e-05
## se_radius                  6.623880e+00  1.174838e+02     8.638162e-04
## se_texture                -1.856009e+00 -2.554974e+01    -9.129688e-04
## se_perimeter               4.829089e+01  8.311382e+02     5.712771e-03
## se_area                    1.150203e+03  2.082563e+04     1.245089e-01
## se_smoothness             -2.187521e-02 -3.107221e-01     2.162942e-05
## se_compactness             1.538251e-01  1.989014e+00     9.190075e-05
## se_concavity               2.273502e-01  3.198939e+00     1.151748e-04
## se_concave_points          8.148585e-02  1.196358e+00     3.017795e-05
## se_symmetry               -3.016683e-02 -5.393287e-01    -2.894072e-06
## se_fractal_dimension      -4.167831e-04 -3.913711e-02     1.019688e-05
## worst_radius               1.604270e+02  2.694301e+03     2.346196e-02
## worst_texture              7.667725e+01  1.229169e+03     3.213115e-02
## worst_perimeter            1.120555e+03  1.858030e+04     1.779042e-01
## worst_area                 1.858030e+04  3.224494e+05     2.663643e+00
## worst_smoothness           1.779042e-01  2.663643e+00     5.206670e-04
## worst_compactness          2.747660e+00  3.850478e+01     2.023059e-03
## worst_concavity            4.282344e+00  6.376714e+01     2.451114e-03
## worst_concave_points       1.785646e+00  2.771831e+01     8.154925e-04
## worst_symmetry             5.380056e-01  7.038101e+00     6.898575e-04
## worst_fractal_dimension    7.970614e-02  7.501753e-01     2.533044e-04
##                         worst_compactness worst_concavity worst_concave_points
## mean_radius                  2.268464e-01    3.850688e-01         1.716677e-01
## mean_texture                 1.948166e-01    2.775081e-01         8.601180e-02
## mean_perimeter               1.723145e+00    2.839595e+00         1.225807e+00
## mean_area                    2.140320e+01    3.743191e+01         1.663904e+01
## mean_smoothness              1.031305e-03    1.261252e-03         4.600052e-04
## mean_compactness             7.081106e-03    8.875002e-03         2.790102e-03
## mean_concavity               9.332580e-03    1.456572e-02         4.465145e-03
## mean_concave_points          4.010709e-03    6.025305e-03         2.299381e-03
## mean_symmetry                2.000497e-03    2.437763e-03         7.605824e-04
## mean_fractal_dimension       4.989919e-04    4.985273e-04         7.727167e-05
## se_radius                    1.204749e-02    2.152133e-02         9.513715e-03
## se_texture                  -7.810953e-03   -7.707941e-03        -4.262841e-03
## se_perimeter                 1.048013e-01    1.725605e-01         7.235189e-02
## se_area                      1.948788e+00    3.573414e+00         1.581957e+00
## se_smoothness               -2.582572e-05   -3.608367e-05        -1.999679e-05
## se_compactness               1.898788e-03    2.373811e-03         5.635329e-04
## se_concavity                 2.290961e-03    4.162923e-03         8.697091e-04
## se_concave_points            4.374838e-04    7.055574e-04         2.436915e-04
## se_symmetry                  7.161077e-05    5.675643e-05        -1.908251e-05
## se_fractal_dimension         1.609760e-04    2.082661e-04         3.685808e-05
## worst_radius                 3.558521e-01    5.726996e-01         2.481787e-01
## worst_texture                3.556175e-01    4.796577e-01         1.478239e-01
## worst_perimeter              2.747660e+00    4.282344e+00         1.785646e+00
## worst_area                   3.850478e+01    6.376714e+01         2.771831e+01
## worst_smoothness             2.023059e-03    2.451114e-03         8.154925e-04
## worst_compactness            2.449950e-02    2.901986e-02         8.189874e-03
## worst_concavity              2.901986e-02    4.325925e-02         1.163442e-02
## worst_concave_points         8.189874e-03    1.163442e-02         4.288187e-03
## worst_symmetry               5.867956e-03    6.753297e-03         2.001936e-03
## worst_fractal_dimension      2.281726e-03    2.564193e-03         5.985573e-04
##                         worst_symmetry worst_fractal_dimension
## mean_radius               3.464846e-02            2.119824e-04
## mean_texture              3.066769e-02            9.826686e-03
## mean_perimeter            2.755397e-01            2.052601e-02
## mean_area                 3.027352e+00            2.423656e-03
## mean_smoothness           3.370779e-04            1.256969e-04
## mean_compactness          1.617994e-03            6.461268e-04
## mean_concavity            1.959591e-03            7.296756e-04
## mean_concave_points       8.741219e-04            2.527607e-04
## mean_symmetry             1.170789e-03            2.137065e-04
## mean_fractal_dimension    1.413789e-04            9.703487e-05
## se_radius                 1.417677e-03            2.060610e-04
## se_texture               -4.289981e-03           -4.364398e-04
## se_perimeter              1.205598e-02            2.771839e-03
## se_area                   1.750265e-01            7.453815e-03
## se_smoothness            -1.978199e-05            5.552556e-06
## se_compactness            3.013360e-04            1.900289e-04
## se_concavity              3.634677e-04            2.385956e-04
## se_concave_points         5.350555e-05            3.442980e-05
## se_symmetry               1.966492e-04            1.109199e-05
## se_fractal_dimension      1.749841e-05            2.816214e-05
## worst_radius              7.021180e-02            7.613078e-03
## worst_texture             9.127322e-02            2.488300e-02
## worst_perimeter           5.380056e-01            7.970614e-02
## worst_area                7.038101e+00            7.501753e-01
## worst_smoothness          6.898575e-04            2.533044e-04
## worst_compactness         5.867956e-03            2.281726e-03
## worst_concavity           6.753297e-03            2.564193e-03
## worst_concave_points      2.001936e-03            5.985573e-04
## worst_symmetry            3.783260e-03            5.915541e-04
## worst_fractal_dimension   5.915541e-04            3.246261e-04
BC_data_d
##   [1]  36.667197  23.587986 112.518389  26.801116  15.316434  15.131692
##   [7]  21.579784  24.116227 144.087507  15.835311  31.651373 212.494449
##  [13]  18.990047  38.923422  40.834902  17.716272  26.852218  43.225484
##  [19]   8.807045  19.171669  16.083660  35.186789  42.512636  52.103396
##  [25]  55.330558  48.565139  17.372671  63.909825  19.527496  40.192355
##  [31]  74.196901  36.879241  47.557286  33.468788  53.488055  29.951792
##  [37]  33.076086 166.813504  45.940652  14.525002  29.581374  79.999212
##  [43]  20.065582  21.460275  31.930043  18.475442  21.314217  16.870705
##  [49]  12.537252   8.640141  11.081646   8.353036  32.279053  17.025551
##  [55]   9.718971  23.652443  29.441335  12.066498  34.906088  47.045653
##  [61]  36.480597  42.820648  39.089492  27.071896  18.132008  11.960760
##  [67]  12.964031 204.163191   9.053891  18.944406 180.997802  51.619864
##  [73]  18.007802   9.797171  20.309723  54.314039  50.261261 166.481675
##  [79]   8.093857  15.493664  31.914619  57.996254 121.789652  10.800407
##  [85]  18.464826  18.157440  81.503022   7.645095  21.477422   9.363843
##  [91]  25.858059  21.638014   7.953269  23.094160  18.060358  21.276553
##  [97]  20.001961  15.205335  12.366376  17.099828  31.140292  14.390717
## [103]  18.874653  14.195865  52.460176  13.671968  13.991013  87.703228
## [109]  28.728496  15.529098  21.498910  69.422699  23.224978  32.405429
## [115]  13.064464  78.902296  18.016960  35.561899  57.616343  17.114034
## [121]  18.576523 180.235051  12.055470  13.714251   8.534301  28.910988
## [127]  17.371286  34.041954  42.054084  16.034471  18.593046  13.499616
## [133]   7.780729  15.622984   9.000889  26.521176   9.556351  92.985574
## [139]  37.548288  15.186098  10.482433  13.709361   9.421971  10.618210
## [145]  35.583307  56.290588  20.750952  10.839386   8.484166  31.440429
## [151]  47.027318 407.281422   6.470556  19.372757   5.881398  25.051525
## [157]  26.483863   7.265333  17.616248  22.867060  45.738700  43.455413
## [163]   8.648729  50.769829   8.593120  14.682473   8.760763  31.297537
## [169]   8.719040   8.993878  23.303286  32.652732  41.841917  19.763072
## [175]  17.079488  51.439484  14.148564  12.738971  12.271847 114.836085
## [181]  57.345254  17.604365  24.018885  15.772771  25.848054  16.162110
## [187]  10.159596  15.181586  10.665832 136.565036  41.273304 179.751929
## [193]  24.230384  32.118070   5.906184  31.365338  26.031409  46.970267
## [199]  36.361764  13.164590  16.717662  75.846605  50.317964   6.926243
## [205]  21.395483  12.511838  11.641920  33.425179  12.219284  43.115379
## [211]   3.769314 384.270077 195.365979  31.711997  14.733200  14.519925
## [217]  15.843525  16.421448  57.097027  12.728694   8.770411   6.320684
## [223]  24.940159   6.222561  12.395297  11.382751  17.176997  17.049395
## [229]  22.830308  31.131396  22.339761  28.090667  19.690154  22.598415
## [235]   9.797729  56.434154  29.974650  25.330203  83.429866   5.911516
## [241]   9.136950  40.729016  13.334693  37.617394  23.966606  16.261485
## [247]  45.876739  14.113270   6.741288  34.373060   7.364382  63.590826
## [253]  11.003199  29.742042  10.247246  61.877214  42.358093 111.628859
## [259]  42.807409  23.653490  13.409193  36.715027  18.058459  22.320302
## [265] 147.417525  19.228770  10.102311  12.754618  19.096065  12.471804
## [271]   6.048037  41.184330  21.865571  15.064117  56.363470  11.799692
## [277]  18.509471   8.484269  10.617139  30.999484  33.582910  27.720768
## [283]  17.508010  17.887351   8.385739  24.323579  10.242419 118.602249
## [289]  10.771496 198.402167  17.535833  18.673926  10.766471   7.697537
## [295]   7.537412  24.081654  23.881691  11.839186  31.026455  24.968612
## [301]   7.952909  51.866444  10.260615   8.071643  12.137848  16.319935
## [307]  28.652340  12.549339   9.447631  10.533826   7.510773   6.536269
## [313]  13.530545 107.097743   9.268292  10.777252   9.974760  68.479207
## [319]  27.296647  21.947082  13.604140  15.753985  57.236231   6.620470
## [325]   8.528651   9.779115   8.813885  12.060779  42.137081  16.428167
## [331]  16.490720  23.299316  11.649334   8.839215  31.389514  23.581687
## [337]  31.590820  10.947917  36.982144   9.164293  24.936416  10.876828
## [343]  34.165419  11.406886  29.531503   7.835953  14.006836  16.579419
## [349]  30.104684   9.130813  46.707102  75.717427  23.998415  17.256451
## [355]  14.305175   9.899024  10.330235  31.373195  27.616237   9.082854
## [361]   8.197965   7.857608  10.007260   6.538524  18.127290  41.142837
## [367]   6.523646  89.634579  45.515606  39.313162  11.503723  38.582228
## [373]  28.126988   9.103849  23.334030  73.252582  17.295187  18.273431
## [379] 109.820092  12.999115  12.832716  31.206006  27.661093  14.340886
## [385]  15.341622   9.752638   9.621680  26.200652  54.915406   8.834377
## [391]  46.741969  24.013561  40.494086  12.419250  12.089921  12.766116
## [397]  12.800060   9.866949   8.198265  76.777954  17.756611  12.581550
## [403]  12.872430  12.708060  10.751121   9.015515  21.415281  15.320827
## [409]  14.776513  43.816799  12.451719  18.547662  15.296034  17.794555
## [415]  15.253142  21.200067  52.689119  16.940206  10.118723   9.088551
## [421]  27.849975  12.865884   8.376759  35.963997  11.145466   9.577649
## [427]  14.473193   7.807181   7.978864  50.930062  19.473940  25.915100
## [433]  24.886236   7.149467  14.478616   9.721094   6.210904   6.380264
## [439]   7.036898  25.867067  24.252014  13.010991  55.210978  21.194870
## [445]  20.251328  20.807840  14.134999  13.749539  32.082829  21.081001
## [451]  24.887721  10.204475  16.852411   8.850729  20.919542  18.513568
## [457]   9.613669   9.467330  17.094967  30.772828 301.538886  15.404104
## [463]  10.841414  15.010132  54.065043  13.973575  14.032799  52.048900
## [469]  19.310975  18.366408  27.613582  11.350126  45.287871  17.233547
## [475]  12.659231  16.026020  13.675411  12.431026  17.941804  13.376819
## [481]   8.327706  15.243519   8.703592  19.811809  49.615019  10.821895
## [487]  25.438088   8.963553  56.095954   8.985001  17.537813   8.963215
## [493]  25.366779  12.032657   9.425982  12.991169   8.909391  20.817665
## [499]  24.037451  25.401774  27.885619  12.078650  58.188371  90.067123
## [505] 104.870556  26.281934  32.450820  17.090232  27.618727  17.548288
## [511]   8.463969  19.937371   9.146607  10.502094   9.689786  13.054305
## [517]  32.144437  30.988341  11.264036  35.771796  47.583571   9.055134
## [523]  13.712336  14.966901  20.568718  22.782114  11.318284  65.565319
## [529]  10.084803  16.669169  10.116316  11.210394  23.195931  13.301509
## [535]  29.603703  19.438959  30.155492  41.581324  69.477383  15.318609
## [541]  16.361906  12.780377  15.195602  13.698070   6.761204   7.578989
## [547]  13.855770  11.194511  15.540137  17.957272  14.290149  15.486300
## [553]  20.903709  15.306381  25.422980  17.903320  46.345839  17.253330
## [559]  25.766159  19.811145  62.726243  96.214378  55.809378  41.154514
## [565]  20.525661  13.441639  63.874806  70.552489
# t-tests, one by one. Benign vs Malignant
with(data=BC_data,t.test(mean_radius[Diagnosis=="M"],mean_radius[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  mean_radius[Diagnosis == "M"] and mean_radius[Diagnosis == "B"]
## t = 25.365, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  4.902325 5.725291
## sample estimates:
## mean of x mean of y 
##  17.46033  12.14652
## the p-value (=p-value < 2.2e-16) is less than the significance level (typically 0.05), we can reject the null hypothesis that the mean radius of the two groups is equal. In other words, we can conclude that there is a significant difference in the mean radius between the benign and malignant groups.

with(data=BC_data,t.test(mean_texture[Diagnosis=="M"],mean_texture[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  mean_texture[Diagnosis == "M"] and mean_texture[Diagnosis == "B"]
## t = 11.079, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  3.079680 4.407004
## sample estimates:
## mean of x mean of y 
##  21.65810  17.91476
## the p-value (=p-value < 2.2e-16) is less than the significance level (typically 0.05), we can reject the null hypothesis that the mean_texture of the two groups is equal. In other words, we can conclude that there is a significant difference in the mean_texture between the benign and malignant groups.

with(data=BC_data,t.test(mean_perimeter[Diagnosis=="M"],mean_perimeter[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  mean_perimeter[Diagnosis == "M"] and mean_perimeter[Diagnosis == "B"]
## t = 26.323, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  34.47484 40.03463
## sample estimates:
## mean of x mean of y 
## 115.33014  78.07541
## the p-value (=p-value < 2.2e-16) is less than the significance level (typically 0.05), we can reject the null hypothesis that the mean_perimeter of the two groups is equal. In other words, we can conclude that there is a significant difference in the mean_perimeter between the benign and malignant groups.

with(data=BC_data,t.test(mean_area[Diagnosis=="M"],mean_area[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  mean_area[Diagnosis == "M"] and mean_area[Diagnosis == "B"]
## t = 23.877, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  473.0753 557.8827
## sample estimates:
## mean of x mean of y 
##  978.2692  462.7902
## the p-value (=p-value < 2.2e-16) is less than the significance level (typically 0.05), we can reject the null hypothesis that the mean_area of the two groups is equal. In other words, we can conclude that there is a significant difference in the mean_area between the benign and malignant groups.

with(data=BC_data,t.test(mean_smoothness[Diagnosis=="M"],mean_smoothness[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  mean_smoothness[Diagnosis == "M"] and mean_smoothness[Diagnosis == "B"]
## t = 9.0713, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.008106919 0.012587834
## sample estimates:
##  mean of x  mean of y 
## 0.10282502 0.09247765
with(data=BC_data,t.test(mean_compactness[Diagnosis=="M"],mean_compactness[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  mean_compactness[Diagnosis == "M"] and mean_compactness[Diagnosis == "B"]
## t = 17.639, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.05729614 0.07165509
## sample estimates:
##  mean of x  mean of y 
## 0.14456024 0.08008462
with(data=BC_data,t.test(mean_concavity[Diagnosis=="M"],mean_concavity[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  mean_concavity[Diagnosis == "M"] and mean_concavity[Diagnosis == "B"]
## t = 23.038, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1043325 0.1237811
## sample estimates:
##  mean of x  mean of y 
## 0.16011441 0.04605762
with(data=BC_data,t.test(mean_concave_points[Diagnosis=="M"],mean_concave_points[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  mean_concave_points[Diagnosis == "M"] and mean_concave_points[Diagnosis == "B"]
## t = 29.305, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.05783743 0.06614747
## sample estimates:
##  mean of x  mean of y 
## 0.08770986 0.02571741
with(data=BC_data,t.test(mean_symmetry[Diagnosis=="M"],mean_symmetry[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  mean_symmetry[Diagnosis == "M"] and mean_symmetry[Diagnosis == "B"]
## t = 8.2416, df = 566, p-value = 1.188e-15
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.01408401 0.02289756
## sample estimates:
## mean of x mean of y 
## 0.1926768 0.1741860
##p-value = 1.188e-15
with(data=BC_data,t.test(mean_fractal_dimension[Diagnosis=="M"],mean_fractal_dimension[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  mean_fractal_dimension[Diagnosis == "M"] and mean_fractal_dimension[Diagnosis == "B"]
## t = -0.43066, df = 566, p-value = 0.6669
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.0014639990  0.0009374555
## sample estimates:
##  mean of x  mean of y 
## 0.06260412 0.06286739
## p-value = 0.6669 In this case, the p-value is much larger than 0.05, indicating that we do not have enough evidence to reject the null hypothesis. Therefore, we fail to reject the null hypothesis and conclude that there is not enough evidence to support the alternative hypothesis.
with(data=BC_data,t.test(se_radius[Diagnosis=="M"],se_radius[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  se_radius[Diagnosis == "M"] and se_radius[Diagnosis == "B"]
## t = 16.307, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.2838289 0.3615657
## sample estimates:
## mean of x mean of y 
## 0.6067796 0.2840824
## p-value < 2.2e-16
with(data=BC_data,t.test(se_texture[Diagnosis=="M"],se_texture[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  se_texture[Diagnosis == "M"] and se_texture[Diagnosis == "B"]
## t = -0.16712, df = 566, p-value = 0.8673
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.10224335  0.08620919
## sample estimates:
## mean of x mean of y 
##  1.212363  1.220380
##  p-value = 0.8673

with(data=BC_data,t.test(se_perimeter[Diagnosis=="M"],se_perimeter[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  se_perimeter[Diagnosis == "M"] and se_perimeter[Diagnosis == "B"]
## t = 15.849, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  2.017931 2.588858
## sample estimates:
## mean of x mean of y 
##  4.303716  2.000321
##  p-value < 2.2e-16
with(data=BC_data,t.test(se_area[Diagnosis=="M"],se_area[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  se_area[Diagnosis == "M"] and se_area[Diagnosis == "B"]
## t = 15.519, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  44.68016 57.62916
## sample estimates:
## mean of x mean of y 
##  72.28981  21.13515
##  p-value < 2.2e-16

with(data=BC_data,t.test(se_smoothness[Diagnosis=="M"],se_smoothness[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  se_smoothness[Diagnosis == "M"] and se_smoothness[Diagnosis == "B"]
## t = -1.5887, df = 566, p-value = 0.1127
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -9.258536e-04  9.785064e-05
## sample estimates:
##   mean of x   mean of y 
## 0.006781900 0.007195902
## p-value = 0.1127
with(data=BC_data,t.test(se_compactness[Diagnosis=="M"],se_compactness[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  se_compactness[Diagnosis == "M"] and se_compactness[Diagnosis == "B"]
## t = 7.2326, df = 566, p-value = 1.546e-12
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.007840453 0.013686533
## sample estimates:
##  mean of x  mean of y 
## 0.03220174 0.02143825
## p-value = 1.546e-12
with(data=BC_data,t.test(se_concavity[Diagnosis=="M"],se_concavity[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  se_concavity[Diagnosis == "M"] and se_concavity[Diagnosis == "B"]
## t = 6.2101, df = 566, p-value = 1.026e-09
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.01078275 0.02075895
## sample estimates:
##  mean of x  mean of y 
## 0.04176758 0.02599674
## p-value = 1.026e-09
with(data=BC_data,t.test(se_concave_points[Diagnosis=="M"],se_concave_points[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  se_concave_points[Diagnosis == "M"] and se_concave_points[Diagnosis == "B"]
## t = 10.61, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.004236499 0.006161465
## sample estimates:
##   mean of x   mean of y 
## 0.015056635 0.009857653
## p-value < 2.2e-16
with(data=BC_data,t.test(se_symmetry[Diagnosis=="M"],se_symmetry[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  se_symmetry[Diagnosis == "M"] and se_symmetry[Diagnosis == "B"]
## t = -0.21818, df = 566, p-value = 0.8274
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.001567401  0.001253996
## sample estimates:
##  mean of x  mean of y 
## 0.02042710 0.02058381
## p-value = 0.8274
with(data=BC_data,t.test(se_fractal_dimension[Diagnosis=="M"],se_fractal_dimension[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  se_fractal_dimension[Diagnosis == "M"] and se_fractal_dimension[Diagnosis == "B"]
## t = 1.815, df = 566, p-value = 0.07006
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.421657e-05  8.667302e-04
## sample estimates:
##   mean of x   mean of y 
## 0.004052308 0.003636051
## p-value = 0.07006

with(data=BC_data,t.test(worst_radius[Diagnosis=="M"],worst_radius[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  worst_radius[Diagnosis == "M"] and worst_radius[Diagnosis == "B"]
## t = 29.244, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  7.215380 8.254401
## sample estimates:
## mean of x mean of y 
##  21.11469  13.37980
## p-value < 2.2e-16
with(data=BC_data,t.test(worst_texture[Diagnosis=="M"],worst_texture[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  worst_texture[Diagnosis == "M"] and worst_texture[Diagnosis == "B"]
## t = 12.374, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  4.929805 6.790103
## sample estimates:
## mean of x mean of y 
##  29.37502  23.51507
## p-value < 2.2e-16
with(data=BC_data,t.test(worst_perimeter[Diagnosis=="M"],worst_perimeter[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  worst_perimeter[Diagnosis == "M"] and worst_perimeter[Diagnosis == "B"]
## t = 29.895, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  50.60118 57.71785
## sample estimates:
## mean of x mean of y 
## 141.16545  87.00594
## p-value < 2.2e-16
with(data=BC_data,t.test(worst_area[Diagnosis=="M"],worst_area[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  worst_area[Diagnosis == "M"] and worst_area[Diagnosis == "B"]
## t = 25.631, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  794.6115 926.5062
## sample estimates:
## mean of x mean of y 
## 1419.4583  558.8994
## p-value < 2.2e-16
with(data=BC_data,t.test(worst_smoothness[Diagnosis=="M"],worst_smoothness[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  worst_smoothness[Diagnosis == "M"] and worst_smoothness[Diagnosis == "B"]
## t = 11.002, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.01626794 0.02333904
## sample estimates:
## mean of x mean of y 
## 0.1447630 0.1249595
## p-value < 2.2e-16
with(data=BC_data,t.test(worst_compactness[Diagnosis=="M"],worst_compactness[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  worst_compactness[Diagnosis == "M"] and worst_compactness[Diagnosis == "B"]
## t = 17.36, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1691889 0.2123581
## sample estimates:
## mean of x mean of y 
## 0.3734460 0.1826725
## p-value < 2.2e-16
with(data=BC_data,t.test(worst_concavity[Diagnosis=="M"],worst_concavity[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  worst_concavity[Diagnosis == "M"] and worst_concavity[Diagnosis == "B"]
## t = 20.808, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.2564038 0.3098552
## sample estimates:
## mean of x mean of y 
## 0.4493672 0.1662377
## p-value < 2.2e-16
with(data=BC_data,t.test(worst_concave_points[Diagnosis=="M"],worst_concave_points[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  worst_concave_points[Diagnosis == "M"] and worst_concave_points[Diagnosis == "B"]
## t = 30.987, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1005911 0.1142066
## sample estimates:
##  mean of x  mean of y 
## 0.18184318 0.07444434
## p-value < 2.2e-16

with(data=BC_data,t.test(worst_symmetry[Diagnosis=="M"],worst_symmetry[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  worst_symmetry[Diagnosis == "M"] and worst_symmetry[Diagnosis == "B"]
## t = 10.801, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.04301350 0.06213538
## sample estimates:
## mean of x mean of y 
## 0.3228204 0.2702459
## p-value < 2.2e-16
with(data=BC_data,t.test(worst_fractal_dimension[Diagnosis=="M"],worst_fractal_dimension[Diagnosis=="B"],var.equal=TRUE))
## 
##  Two Sample t-test
## 
## data:  worst_fractal_dimension[Diagnosis == "M"] and worst_fractal_dimension[Diagnosis == "B"]
## t = 8.0631, df = 566, p-value = 4.452e-15
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.009045164 0.014871164
## sample estimates:
##  mean of x  mean of y 
## 0.09140024 0.07944207
##  p-value = 4.452e-15

library(Hotelling)
t2testBC_data <- hotelling.test(mean_radius + mean_texture + mean_perimeter + mean_area + mean_smoothness + mean_compactness + mean_concavity + mean_concave_points + mean_symmetry + mean_fractal_dimension + se_radius + se_texture + se_perimeter + se_area + se_smoothness + se_compactness + se_concavity + se_concave_points + se_symmetry + se_fractal_dimension + worst_radius + worst_texture + worst_perimeter + worst_area + worst_smoothness + worst_compactness + worst_concavity + worst_concave_points + worst_symmetry + worst_fractal_dimension ~ Diagnosis, data=BC_data)
t2testBC_data
## Test stat:  1934.8 
## Numerator df:  30 
## Denominator df:  537 
## P-value:  0
# Output of the function hotelling.test is given
cat("T2 statistic =",t2testBC_data$stat[[1]],"\n")
## T2 statistic = 1934.799
print(t2testBC_data)
## Test stat:  1934.8 
## Numerator df:  30 
## Denominator df:  537 
## P-value:  0
# The output of the hotelling.test function indicates that there is strong evidence against the null hypothesis, as the p-value is less than the significance level (alpha) of 0.05. Specifically, the output includes: The test statistic (T-squared): In this case, the test statistic is 1934.8. This measures the distance between the sample means of the groups relative to the within-group variability. A larger T-squared value indicates greater differences between the group means.The degrees of freedom: The numerator degrees of freedom is 30, and the denominator degrees of freedom is 537. These values depend on the number of groups being compared and the sample sizes of each group. The p-value: In this case, the p-value is 0. This is the probability of obtaining a test statistic as extreme or more extreme than the observed value, assuming the null hypothesis is true. A p-value of 0 indicates that the observed test statistic is very unlikely to have occurred by chance, and provides strong evidence against the null hypothesis. 

# Since the p-value is less than alpha, we reject the null hypothesis and conclude that there are significant differences between the group means. However, it's important to note that the interpretation of the results depends on the specific context and the hypothesis being tested.

# Levene's tests based on absolute differences around means using t-tests. Standarizing the data set with scale()

matstand <- scale(BC_data[,3:32])
matMalignant <- matstand[Diagnosis == "M",]

matBenign <- matstand[Diagnosis == "B",]
vecmedianbenign <- apply(matBenign, 2, median)


# in the above 2 represents column. Hence, we are asking for column median


vecmedianmalignant <- apply(matMalignant, 2, median)
matabsdevbenign <- abs(matBenign - matrix(rep(vecmedianbenign,nrow(matBenign)),nrow=nrow(matBenign), byrow=TRUE))

matabsdevmalignant <- abs(matMalignant - matrix(rep(vecmedianmalignant,nrow(matMalignant)),nrow=nrow(matMalignant), byrow=TRUE))



matabsdev.all <- rbind(matabsdevbenign,matabsdevmalignant)
matabsdev.all <- data.frame(Diagnosis, matabsdev.all)

t.test(matabsdev.all$mean_radius[Diagnosis == "M"],matabsdev.all$mean_radius[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$mean_radius[Diagnosis == "M"] and matabsdev.all$mean_radius[Diagnosis == "B"]
## t = -1.7461, df = 566, p-value = 0.04067
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##          -Inf -0.003737858
## sample estimates:
## mean of x mean of y 
## 0.4845537 0.5507960
## The null hypothesis for this test is that the mean radius measurements of malignant tumor samples are equal to the mean radius measurements of benign tumor samples. The alternative hypothesis is that the mean radius measurements of malignant tumor samples are less than the mean radius measurements of benign tumor samples. 
#The p-value is 0.04067, which is less than the commonly used significance level of 0.05. This means that we can reject the null hypothesis and conclude that the mean radius measurements of malignant tumor samples are significantly less than the mean radius measurements of benign tumor samples.

#The 95 percent confidence interval for the difference in means is (-Inf, -0.003737858), which means that we can be 95 percent confident that the true difference in means is between negative infinity and -0.003737858.

#The sample estimates for the mean radius measurements of malignant tumor samples and benign tumor samples are 0.4845537 and 0.5507960, respectively. This means that, on average, the mean radius measurements of benign tumor samples are higher than those of malignant tumor samples.

t.test(matabsdev.all$mean_texture[Diagnosis == "M"],matabsdev.all$mean_texture[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$mean_texture[Diagnosis == "M"] and matabsdev.all$mean_texture[Diagnosis == "B"]
## t = -1.7518, df = 566, p-value = 0.04018
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##          -Inf -0.005508984
## sample estimates:
## mean of x mean of y 
## 0.6195664 0.7121570
t.test(matabsdev.all$mean_perimeter[Diagnosis == "M"],matabsdev.all$mean_perimeter[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$mean_perimeter[Diagnosis == "M"] and matabsdev.all$mean_perimeter[Diagnosis == "B"]
## t = -1.7273, df = 566, p-value = 0.04233
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##          -Inf -0.003014429
## sample estimates:
## mean of x mean of y 
## 0.4686461 0.5339428
t.test(matabsdev.all$mean_area[Diagnosis == "M"],matabsdev.all$mean_area[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$mean_area[Diagnosis == "M"] and matabsdev.all$mean_area[Diagnosis == "B"]
## t = -2.6434, df = 566, p-value = 0.004218
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##         -Inf -0.04407463
## sample estimates:
## mean of x mean of y 
## 0.4184596 0.5354555
t.test(matabsdev.all$mean_smoothness[Diagnosis == "M"],matabsdev.all$mean_smoothness[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$mean_smoothness[Diagnosis == "M"] and matabsdev.all$mean_smoothness[Diagnosis == "B"]
## t = -0.1032, df = 566, p-value = 0.4589
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf 0.07824574
## sample estimates:
## mean of x mean of y 
## 0.7335777 0.7388066
t.test(matabsdev.all$mean_compactness[Diagnosis == "M"],matabsdev.all$mean_compactness[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$mean_compactness[Diagnosis == "M"] and matabsdev.all$mean_compactness[Diagnosis == "B"]
## t = -2.0639, df = 566, p-value = 0.01974
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##         -Inf -0.02040952
## sample estimates:
## mean of x mean of y 
## 0.5290255 0.6301977
t.test(matabsdev.all$mean_concavity[Diagnosis == "M"],matabsdev.all$mean_concavity[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$mean_concavity[Diagnosis == "M"] and matabsdev.all$mean_concavity[Diagnosis == "B"]
## t = -2.1737, df = 566, p-value = 0.01507
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##         -Inf -0.02493922
## sample estimates:
## mean of x mean of y 
## 0.4136701 0.5167088
t.test(matabsdev.all$mean_concave_points[Diagnosis == "M"],matabsdev.all$mean_concave_points[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$mean_concave_points[Diagnosis == "M"] and matabsdev.all$mean_concave_points[Diagnosis == "B"]
## t = -1.1928, df = 566, p-value = 0.1167
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf 0.01795869
## sample estimates:
## mean of x mean of y 
## 0.4093418 0.4564448
t.test(matabsdev.all$mean_symmetry[Diagnosis == "M"],matabsdev.all$mean_symmetry[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$mean_symmetry[Diagnosis == "M"] and matabsdev.all$mean_symmetry[Diagnosis == "B"]
## t = -0.79298, df = 566, p-value = 0.2141
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf 0.04569144
## sample estimates:
## mean of x mean of y 
## 0.6974537 0.7398520
t.test(matabsdev.all$mean_fractal_dimension[Diagnosis == "M"],matabsdev.all$mean_fractal_dimension[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$mean_fractal_dimension[Diagnosis == "M"] and matabsdev.all$mean_fractal_dimension[Diagnosis == "B"]
## t = -0.23818, df = 566, p-value = 0.4059
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf 0.08549341
## sample estimates:
## mean of x mean of y 
## 0.7279925 0.7424409
t.test(matabsdev.all$se_radius[Diagnosis == "M"],matabsdev.all$se_radius[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$se_radius[Diagnosis == "M"] and matabsdev.all$se_radius[Diagnosis == "B"]
## t = -1.701, df = 566, p-value = 0.04475
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##         -Inf -0.00310138
## sample estimates:
## mean of x mean of y 
## 0.4440878 0.5428160
t.test(matabsdev.all$se_texture[Diagnosis == "M"],matabsdev.all$se_texture[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$se_texture[Diagnosis == "M"] and matabsdev.all$se_texture[Diagnosis == "B"]
## t = 1.0551, df = 566, p-value = 0.8541
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##       -Inf 0.1666405
## sample estimates:
## mean of x mean of y 
## 0.7687200 0.7036643
t.test(matabsdev.all$se_perimeter[Diagnosis == "M"],matabsdev.all$se_perimeter[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$se_perimeter[Diagnosis == "M"] and matabsdev.all$se_perimeter[Diagnosis == "B"]
## t = -1.746, df = 566, p-value = 0.04068
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##          -Inf -0.005976637
## sample estimates:
## mean of x mean of y 
## 0.4270034 0.5330371
t.test(matabsdev.all$se_area[Diagnosis == "M"],matabsdev.all$se_area[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$se_area[Diagnosis == "M"] and matabsdev.all$se_area[Diagnosis == "B"]
## t = -2.3586, df = 566, p-value = 0.009342
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##         -Inf -0.04725161
## sample estimates:
## mean of x mean of y 
## 0.2844177 0.4411603
t.test(matabsdev.all$se_smoothness[Diagnosis == "M"],matabsdev.all$se_smoothness[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$se_smoothness[Diagnosis == "M"] and matabsdev.all$se_smoothness[Diagnosis == "B"]
## t = 0.70231, df = 566, p-value = 0.7586
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##       -Inf 0.1554153
## sample estimates:
## mean of x mean of y 
## 0.7071745 0.6607249
t.test(matabsdev.all$se_compactness[Diagnosis == "M"],matabsdev.all$se_compactness[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$se_compactness[Diagnosis == "M"] and matabsdev.all$se_compactness[Diagnosis == "B"]
## t = -1.2939, df = 566, p-value = 0.09811
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf 0.02338511
## sample estimates:
## mean of x mean of y 
## 0.5796344 0.6652031
t.test(matabsdev.all$se_concavity[Diagnosis == "M"],matabsdev.all$se_concavity[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$se_concavity[Diagnosis == "M"] and matabsdev.all$se_concavity[Diagnosis == "B"]
## t = -0.66881, df = 566, p-value = 0.2519
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf 0.07200066
## sample estimates:
## mean of x mean of y 
## 0.4862588 0.5354594
t.test(matabsdev.all$se_concave_points[Diagnosis == "M"],matabsdev.all$se_concave_points[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$se_concave_points[Diagnosis == "M"] and matabsdev.all$se_concave_points[Diagnosis == "B"]
## t = 0.66471, df = 566, p-value = 0.7467
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##       -Inf 0.1350191
## sample estimates:
## mean of x mean of y 
## 0.6563084 0.6174943
t.test(matabsdev.all$se_symmetry[Diagnosis == "M"],matabsdev.all$se_symmetry[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$se_symmetry[Diagnosis == "M"] and matabsdev.all$se_symmetry[Diagnosis == "B"]
## t = -0.87187, df = 566, p-value = 0.1918
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf 0.05280659
## sample estimates:
## mean of x mean of y 
## 0.6308683 0.6902229
t.test(matabsdev.all$se_fractal_dimension[Diagnosis == "M"],matabsdev.all$se_fractal_dimension[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$se_fractal_dimension[Diagnosis == "M"] and matabsdev.all$se_fractal_dimension[Diagnosis == "B"]
## t = 1.2145, df = 566, p-value = 0.8875
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##       -Inf 0.2114866
## sample estimates:
## mean of x mean of y 
## 0.6358429 0.5460992
t.test(matabsdev.all$worst_radius[Diagnosis == "M"],matabsdev.all$worst_radius[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$worst_radius[Diagnosis == "M"] and matabsdev.all$worst_radius[Diagnosis == "B"]
## t = -2.5217, df = 566, p-value = 0.005976
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##         -Inf -0.03237456
## sample estimates:
## mean of x mean of y 
## 0.4091166 0.5025075
t.test(matabsdev.all$worst_texture[Diagnosis == "M"],matabsdev.all$worst_texture[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$worst_texture[Diagnosis == "M"] and matabsdev.all$worst_texture[Diagnosis == "B"]
## t = -1.6553, df = 566, p-value = 0.04921
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##          -Inf -0.000375359
## sample estimates:
## mean of x mean of y 
## 0.6419832 0.7226258
t.test(matabsdev.all$worst_perimeter[Diagnosis == "M"],matabsdev.all$worst_perimeter[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$worst_perimeter[Diagnosis == "M"] and matabsdev.all$worst_perimeter[Diagnosis == "B"]
## t = -2.6433, df = 566, p-value = 0.004219
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##         -Inf -0.03652691
## sample estimates:
## mean of x mean of y 
## 0.3979253 0.4948913
t.test(matabsdev.all$worst_area[Diagnosis == "M"],matabsdev.all$worst_area[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$worst_area[Diagnosis == "M"] and matabsdev.all$worst_area[Diagnosis == "B"]
## t = -3.2923, df = 566, p-value = 0.0005278
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##         -Inf -0.07605857
## sample estimates:
## mean of x mean of y 
## 0.3403765 0.4926226
t.test(matabsdev.all$worst_smoothness[Diagnosis == "M"],matabsdev.all$worst_smoothness[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$worst_smoothness[Diagnosis == "M"] and matabsdev.all$worst_smoothness[Diagnosis == "B"]
## t = 0.019681, df = 566, p-value = 0.5078
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf 0.08214453
## sample estimates:
## mean of x mean of y 
## 0.7099286 0.7089589
t.test(matabsdev.all$worst_compactness[Diagnosis == "M"],matabsdev.all$worst_compactness[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$worst_compactness[Diagnosis == "M"] and matabsdev.all$worst_compactness[Diagnosis == "B"]
## t = -1.8218, df = 566, p-value = 0.0345
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##          -Inf -0.008626281
## sample estimates:
## mean of x mean of y 
## 0.5217852 0.6119643
t.test(matabsdev.all$worst_concavity[Diagnosis == "M"],matabsdev.all$worst_concavity[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$worst_concavity[Diagnosis == "M"] and matabsdev.all$worst_concavity[Diagnosis == "B"]
## t = -1.735, df = 566, p-value = 0.04165
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##          -Inf -0.004240361
## sample estimates:
## mean of x mean of y 
## 0.4767373 0.5609101
t.test(matabsdev.all$worst_concave_points[Diagnosis == "M"],matabsdev.all$worst_concave_points[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$worst_concave_points[Diagnosis == "M"] and matabsdev.all$worst_concave_points[Diagnosis == "B"]
## t = -1.2311, df = 566, p-value = 0.1094
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf 0.01365571
## sample estimates:
## mean of x mean of y 
## 0.4518694 0.4922350
t.test(matabsdev.all$worst_symmetry[Diagnosis == "M"],matabsdev.all$worst_symmetry[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$worst_symmetry[Diagnosis == "M"] and matabsdev.all$worst_symmetry[Diagnosis == "B"]
## t = -1.0991, df = 566, p-value = 0.1361
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf 0.03033607
## sample estimates:
## mean of x mean of y 
## 0.6238138 0.6846072
t.test(matabsdev.all$worst_fractal_dimension[Diagnosis == "M"],matabsdev.all$worst_fractal_dimension[Diagnosis == "B"], alternative="less",var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  matabsdev.all$worst_fractal_dimension[Diagnosis == "M"] and matabsdev.all$worst_fractal_dimension[Diagnosis == "B"]
## t = -1.2337, df = 566, p-value = 0.1089
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf 0.02436987
## sample estimates:
## mean of x mean of y 
## 0.6348451 0.7074898
matstand.all <- data.frame(Diagnosis, matstand)

colnames(matstand.all) <- colnames(BC_data[2:32])
t2testcdata <- hotelling.test(mean_radius + mean_texture + mean_perimeter + mean_area + mean_smoothness + mean_compactness + mean_concavity + mean_concave_points + mean_symmetry + mean_fractal_dimension + se_radius + se_texture + se_perimeter + se_area + se_smoothness + se_compactness + se_concavity + se_concave_points + se_symmetry + se_fractal_dimension + worst_radius + worst_texture + worst_perimeter + worst_area + worst_smoothness + worst_compactness + worst_concavity + worst_concave_points + worst_symmetry + worst_fractal_dimension ~ Diagnosis, data=matstand.all)
cat("T2 statistic =",t2testcdata$stat[[1]],"\n")
## T2 statistic = 1934.799
print(t2testcdata)
## Test stat:  1934.8 
## Numerator df:  30 
## Denominator df:  537 
## P-value:  0
# In the above we standardized using scale function
#matabsdev.all

# We can also look at Van Valen's test. Equivalent to the comparison of mean absolute median
# diferences between two groups. In the sparrows' example, the Van Valen's test
# is one-sided (Mean dij for survivors < Mean dij for non-survivors)
# dij is the norm of the individual vector i composed by the absolute
# deviations computed for all the variables in sample j.
# These norms define the second column of the data frame d.all

d.all <- data.frame(Diagnosis,sqrt(rowSums(matabsdev.all[,-1]^2)))
#d.all
colnames(d.all)[2] <- "dij"
#d.all
head(d.all)
##   Diagnosis      dij
## 1         M 2.497548
## 2         M 2.764340
## 3         M 3.071548
## 4         M 3.689073
## 5         M 2.993284
## 6         M 2.200929
with(d.all, t.test(dij[Diagnosis=="M"], dij[Diagnosis=="B"],var.equal=TRUE, alternative="less"))
## 
##  Two Sample t-test
## 
## data:  dij[Diagnosis == "M"] and dij[Diagnosis == "B"]
## t = -1.6824, df = 566, p-value = 0.04652
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##          -Inf -0.006935767
## sample estimates:
## mean of x mean of y 
##  3.897171  4.231733
sprintf("d-values for Malignant: Mean = %2.3f, Variance = %2.3f",mean(d.all$dij[Diagnosis=="M"]),var(d.all$dij[Diagnosis=="M"]))
## [1] "d-values for Malignant: Mean = 3.897, Variance = 4.900"
sprintf("d-values for Benign: Mean = %2.3f, Variance = %2.3f",mean(d.all$dij[Diagnosis=="B"]),var(d.all$dij[Diagnosis=="B"]))
## [1] "d-values for Benign: Mean = 4.232, Variance = 5.448"
# Hotelling Test


# Leverne test is used to verify Homoscedasticity. It tests if the variance of two samples are # #equal. Levene's test is an inferential statistic used to assess the equality of variances for a #variable calculated for two or more groups.[1] Some common statistical procedures assume that #variances of the populations from which different samples are drawn are equal. Levene's test #assesses this assumption.



library(car)
#leveneTest() produces a two-sided test
leveneTest(mean_radius ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   1  91.591 < 2.2e-16 ***
##       566                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(mean_texture ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  1.0103 0.3153
##       566
leveneTest(mean_perimeter ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   1   91.88 < 2.2e-16 ***
##       566                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(mean_area ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   1  171.64 < 2.2e-16 ***
##       566                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(mean_smoothness ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.9135 0.3396
##       566
leveneTest(mean_compactness ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   1  37.938 1.386e-09 ***
##       566                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(mean_concavity ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value   Pr(>F)    
## group   1  68.567 8.88e-16 ***
##       566                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(mean_concave_points ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   1  92.935 < 2.2e-16 ***
##       566                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(mean_symmetry ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  1.7553 0.1858
##       566
leveneTest(mean_fractal_dimension ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)  
## group   1  5.5127 0.01922 *
##       566                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(se_radius ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   1  108.61 < 2.2e-16 ***
##       566                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(se_texture ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)  
## group   1  5.5316 0.01902 *
##       566                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(se_perimeter ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   1  91.937 < 2.2e-16 ***
##       566                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(se_area ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   1   116.6 < 2.2e-16 ***
##       566                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(se_smoothness ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)  
## group   1  4.5962 0.03247 *
##       566                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(se_compactness ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)  
## group   1  3.6443 0.05677 .
##       566                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(se_concavity ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1   0.028 0.8672
##       566
leveneTest(se_concave_points ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.1342 0.7143
##       566
leveneTest(se_symmetry ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)  
## group   1   2.974 0.08516 .
##       566                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(se_fractal_dimension ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.5963 0.4403
##       566
leveneTest(worst_radius ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   1  131.08 < 2.2e-16 ***
##       566                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(worst_texture ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.1699 0.6804
##       566
leveneTest(worst_perimeter ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   1   125.8 < 2.2e-16 ***
##       566                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(worst_area ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   1  194.51 < 2.2e-16 ***
##       566                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(worst_smoothness ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.5165 0.4726
##       566
leveneTest(worst_compactness ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   1   58.72 7.939e-14 ***
##       566                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(worst_concavity ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   1  18.666 1.839e-05 ***
##       566                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(worst_concave_points ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   1  13.357 0.0002815 ***
##       566                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(worst_symmetry ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   1  37.331 1.856e-09 ***
##       566                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(worst_fractal_dimension ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   1  34.663 6.721e-09 ***
##       566                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ANOVA

summary(aov(mean_radius ~ Diagnosis))
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Diagnosis     1   3745    3745   643.4 <2e-16 ***
## Residuals   566   3294       6                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(mean_texture ~ Diagnosis))
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Diagnosis     1   1858  1858.3   122.7 <2e-16 ***
## Residuals   566   8570    15.1                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(mean_perimeter ~ Diagnosis))
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Diagnosis     1 184063  184063   692.9 <2e-16 ***
## Residuals   566 150355     266                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(mean_area ~ Diagnosis))
##              Df   Sum Sq  Mean Sq F value Pr(>F)    
## Diagnosis     1 35239058 35239058   570.1 <2e-16 ***
## Residuals   566 34984077    61809                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(mean_smoothness ~ Diagnosis))
##              Df  Sum Sq  Mean Sq F value Pr(>F)    
## Diagnosis     1 0.01420 0.014199   82.29 <2e-16 ***
## Residuals   566 0.09766 0.000173                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(mean_compactness ~ Diagnosis))
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Diagnosis     1 0.5513  0.5513   311.1 <2e-16 ***
## Residuals   566 1.0029  0.0018                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(mean_concavity ~ Diagnosis))
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Diagnosis     1  1.725  1.7252   530.7 <2e-16 ***
## Residuals   566  1.840  0.0033                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(mean_concave_points ~ Diagnosis))
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Diagnosis     1 0.5097  0.5097   858.8 <2e-16 ***
## Residuals   566 0.3359  0.0006                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(mean_symmetry ~ Diagnosis))
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Diagnosis     1 0.0453 0.04534   67.92 1.19e-15 ***
## Residuals   566 0.3778 0.00067                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(mean_fractal_dimension ~ Diagnosis))
##              Df   Sum Sq   Mean Sq F value Pr(>F)
## Diagnosis     1 0.000009 9.190e-06   0.185  0.667
## Residuals   566 0.028051 4.956e-05
summary(aov(se_radius ~ Diagnosis))
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Diagnosis     1  13.81  13.810   265.9 <2e-16 ***
## Residuals   566  29.39   0.052                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(se_texture ~ Diagnosis))
##              Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis     1   0.01 0.00852   0.028  0.867
## Residuals   566 172.75 0.30520
summary(aov(se_perimeter ~ Diagnosis))
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Diagnosis     1  703.6   703.6   251.2 <2e-16 ***
## Residuals   566 1585.5     2.8                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(se_area ~ Diagnosis))
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Diagnosis     1 347035  347035   240.8 <2e-16 ***
## Residuals   566 815597    1441                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(se_smoothness ~ Diagnosis))
##              Df   Sum Sq   Mean Sq F value Pr(>F)
## Diagnosis     1 0.000023 2.273e-05   2.524  0.113
## Residuals   566 0.005097 9.006e-06
summary(aov(se_compactness ~ Diagnosis))
##              Df  Sum Sq  Mean Sq F value   Pr(>F)    
## Diagnosis     1 0.01536 0.015364   52.31 1.55e-12 ***
## Residuals   566 0.16624 0.000294                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(se_concavity ~ Diagnosis))
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Diagnosis     1 0.0330 0.03298   38.56 1.03e-09 ***
## Residuals   566 0.4841 0.00086                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(se_concave_points ~ Diagnosis))
##              Df   Sum Sq  Mean Sq F value Pr(>F)    
## Diagnosis     1 0.003585 0.003585   112.6 <2e-16 ***
## Residuals   566 0.018024 0.000032                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(se_symmetry ~ Diagnosis))
##              Df  Sum Sq   Mean Sq F value Pr(>F)
## Diagnosis     1 0.00000 3.260e-06   0.048  0.827
## Residuals   566 0.03872 6.841e-05
summary(aov(se_fractal_dimension ~ Diagnosis))
##              Df   Sum Sq   Mean Sq F value Pr(>F)  
## Diagnosis     1 0.000023 2.298e-05   3.294 0.0701 .
## Residuals   566 0.003948 6.976e-06                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_radius ~ Diagnosis))
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Diagnosis     1   7934    7934   855.2 <2e-16 ***
## Residuals   566   5251       9                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_texture ~ Diagnosis))
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Diagnosis     1   4554    4554   153.1 <2e-16 ***
## Residuals   566  16833      30                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_perimeter ~ Diagnosis))
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Diagnosis     1 389002  389002   893.7 <2e-16 ***
## Residuals   566 246353     435                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_area ~ Diagnosis))
##              Df   Sum Sq  Mean Sq F value Pr(>F)    
## Diagnosis     1 98211759 98211759   656.9 <2e-16 ***
## Residuals   566 84617041   149500                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_smoothness ~ Diagnosis))
##              Df  Sum Sq Mean Sq F value Pr(>F)    
## Diagnosis     1 0.05201 0.05201     121 <2e-16 ***
## Residuals   566 0.24321 0.00043                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_compactness ~ Diagnosis))
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Diagnosis     1  4.827   4.827   301.4 <2e-16 ***
## Residuals   566  9.065   0.016                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_concavity ~ Diagnosis))
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Diagnosis     1  10.63  10.631     433 <2e-16 ***
## Residuals   566  13.90   0.025                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_concave_points ~ Diagnosis))
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Diagnosis     1 1.5297  1.5297   960.2 <2e-16 ***
## Residuals   566 0.9017  0.0016                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_symmetry ~ Diagnosis))
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Diagnosis     1 0.3666  0.3666   116.7 <2e-16 ***
## Residuals   566 1.7785  0.0031                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_fractal_dimension ~ Diagnosis))
##              Df  Sum Sq  Mean Sq F value   Pr(>F)    
## Diagnosis     1 0.01896 0.018964   65.01 4.45e-15 ***
## Residuals   566 0.16510 0.000292                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# mahalanobis
library(stats)
BC_data_MD <- mahalanobis(BC_data_x, BC_data_cm, BC_data_S)
#BC_data_MD
BC_data$pvalues <- pchisq(BC_data_MD, df=3, lower.tail=FALSE)
#BC_data

# BoxM

library(biotools)
boxM(BC_data[,3:32],Diagnosis)
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  BC_data[, 3:32]
## Chi-Sq (approx.) = 7018, df = 465, p-value < 2.2e-16
# The Box's M-test for Homogeneity of Covariance Matrices is a statistical test used to determine whether the covariance matrices of two or more groups are equal

# The test statistic for Box's M-test follows a chi-squared distribution with degrees of freedom equal to the product of the number of groups and the number of variables. In the output, the test statistic is reported as Chi-Sq (approx.) = 7018, with degrees of freedom (df) = 465. This indicates that there are 465 variables in the data and that the test has been performed on multiple groups.

#The p-value for the test is  < 2.2e-16, which means that the observed test statistic is highly significant at conventional levels of significance. This indicates strong evidence against the null hypothesis of homogeneity of covariance matrices, meaning that the covariance matrices for the groups are not equal.

#Therefore, based on the results of the Box's M-test, it can be inferred that there is evidence of significant differences in the covariance matrices between the groups in the data.


# MANOVA
summary(manova(as.matrix(BC_data[,-2])~ Diagnosis))
##            Df  Pillai approx F num Df den Df    Pr(>F)    
## Diagnosis   1 0.77871   58.833     32    535 < 2.2e-16 ***
## Residuals 566                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The output shows the results of the MANOVA with respect to the effect of the Diagnosis variable on the dependent variables.

#The first row of the output shows the results of the MANOVA, with the Pillai's trace statistic being 0.77383 and the approximate F statistic being 59.158. The denominator degrees of freedom (den Df) are 536, indicating that there are 536 observations in the dataset. The null hypothesis of the MANOVA is that there are no differences in the means of the dependent variables between the groups defined by the Diagnosis variable. 

#The p-value for the MANOVA is reported as < 2.2e-16, which is highly significant. implying a strong evidence against the null hypothesis, indicating that there are significant differences in the means of the dependent variables between the Malignant and Benign groups defined by the Diagnosis variable. 

#The second row of the output shows the results of the residuals, which indicate the variability in the dependent variables that is not explained by the Diagnosis variable. The residual degrees of freedom are 566, indicating that there are 566 observations in the dataset that are not accounted for by the Diagnosis variable.

# In summary, based on the results of the MANOVA, it can be inferred that there are significant differences in the means of the dependent variables between the groups defined by the Diagnosis variable in the BC_data dataset.

## PCA
#cor(BC_data[-2])
bca_pca <- prcomp(BC_data[,-2],scale=TRUE)
#bca_pca
summary(bca_pca)
## Importance of components:
##                           PC1    PC2     PC3    PC4     PC5     PC6     PC7
## Standard deviation     3.6524 2.3960 1.69017 1.4109 1.28656 1.11383 0.99173
## Proportion of Variance 0.4169 0.1794 0.08927 0.0622 0.05173 0.03877 0.03074
## Cumulative Proportion  0.4169 0.5963 0.68555 0.7478 0.79948 0.83825 0.86898
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.92636 0.81702 0.67952 0.63468 0.59112 0.54334 0.51050
## Proportion of Variance 0.02682 0.02086 0.01443 0.01259 0.01092 0.00923 0.00814
## Cumulative Proportion  0.89580 0.91666 0.93109 0.94367 0.95459 0.96382 0.97196
##                           PC15    PC16    PC17    PC18    PC19    PC20    PC21
## Standard deviation     0.49270 0.39480 0.30692 0.28141 0.24355 0.22812 0.22244
## Proportion of Variance 0.00759 0.00487 0.00294 0.00247 0.00185 0.00163 0.00155
## Cumulative Proportion  0.97955 0.98442 0.98736 0.98984 0.99169 0.99332 0.99487
##                           PC22    PC23    PC24    PC25    PC26    PC27    PC28
## Standard deviation     0.17626 0.17353 0.16603 0.15617 0.13476 0.12484 0.09023
## Proportion of Variance 0.00097 0.00094 0.00086 0.00076 0.00057 0.00049 0.00025
## Cumulative Proportion  0.99584 0.99678 0.99764 0.99840 0.99897 0.99946 0.99971
##                           PC29    PC30    PC31    PC32
## Standard deviation     0.08256 0.03995 0.02734 0.01135
## Proportion of Variance 0.00021 0.00005 0.00002 0.00000
## Cumulative Proportion  0.99992 0.99997 1.00000 1.00000
(eigen_bca <- bca_pca$sdev^2)
##  [1] 1.334011e+01 5.740679e+00 2.856671e+00 1.990515e+00 1.655248e+00
##  [6] 1.240621e+00 9.835331e-01 8.581344e-01 6.675166e-01 4.617481e-01
## [11] 4.028135e-01 3.494252e-01 2.952150e-01 2.606132e-01 2.427493e-01
## [16] 1.558668e-01 9.419889e-02 7.918881e-02 5.931593e-02 5.203975e-02
## [21] 4.948143e-02 3.106843e-02 3.011234e-02 2.756705e-02 2.439053e-02
## [26] 1.816117e-02 1.558456e-02 8.140586e-03 6.816197e-03 1.596090e-03
## [31] 7.474333e-04 1.288493e-04
names(eigen_bca) <- paste("PC",1:8,sep="")
#eigen_bca
sumlambdas <- sum(eigen_bca)
#sumlambdas
propvar <- eigen_bca/sumlambdas
#propvar
cumvar_bca <- cumsum(propvar)
#cumvar_bca
matlambdas <- rbind(eigen_bca,propvar,cumvar_bca)
rownames(matlambdas) <- c("Eigenvalues","Prop. variance","Cum. prop. variance")
round(matlambdas,4)
##                         PC1    PC2    PC3    PC4    PC5    PC6    PC7    PC8
## Eigenvalues         13.3401 5.7407 2.8567 1.9905 1.6552 1.2406 0.9835 0.8581
## Prop. variance       0.4169 0.1794 0.0893 0.0622 0.0517 0.0388 0.0307 0.0268
## Cum. prop. variance  0.4169 0.5963 0.6855 0.7477 0.7995 0.8382 0.8690 0.8958
##                       <NA>   <NA>   <NA>   <NA>   <NA>   <NA>   <NA>   <NA>
## Eigenvalues         0.6675 0.4617 0.4028 0.3494 0.2952 0.2606 0.2427 0.1559
## Prop. variance      0.0209 0.0144 0.0126 0.0109 0.0092 0.0081 0.0076 0.0049
## Cum. prop. variance 0.9167 0.9311 0.9437 0.9546 0.9638 0.9720 0.9795 0.9844
##                       <NA>   <NA>   <NA>   <NA>   <NA>   <NA>   <NA>   <NA>
## Eigenvalues         0.0942 0.0792 0.0593 0.0520 0.0495 0.0311 0.0301 0.0276
## Prop. variance      0.0029 0.0025 0.0019 0.0016 0.0015 0.0010 0.0009 0.0009
## Cum. prop. variance 0.9874 0.9898 0.9917 0.9933 0.9949 0.9958 0.9968 0.9976
##                       <NA>   <NA>   <NA>   <NA>   <NA>   <NA>  <NA>  <NA>
## Eigenvalues         0.0244 0.0182 0.0156 0.0081 0.0068 0.0016 7e-04 1e-04
## Prop. variance      0.0008 0.0006 0.0005 0.0003 0.0002 0.0000 0e+00 0e+00
## Cum. prop. variance 0.9984 0.9990 0.9995 0.9997 0.9999 1.0000 1e+00 1e+00
summary(bca_pca)
## Importance of components:
##                           PC1    PC2     PC3    PC4     PC5     PC6     PC7
## Standard deviation     3.6524 2.3960 1.69017 1.4109 1.28656 1.11383 0.99173
## Proportion of Variance 0.4169 0.1794 0.08927 0.0622 0.05173 0.03877 0.03074
## Cumulative Proportion  0.4169 0.5963 0.68555 0.7478 0.79948 0.83825 0.86898
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.92636 0.81702 0.67952 0.63468 0.59112 0.54334 0.51050
## Proportion of Variance 0.02682 0.02086 0.01443 0.01259 0.01092 0.00923 0.00814
## Cumulative Proportion  0.89580 0.91666 0.93109 0.94367 0.95459 0.96382 0.97196
##                           PC15    PC16    PC17    PC18    PC19    PC20    PC21
## Standard deviation     0.49270 0.39480 0.30692 0.28141 0.24355 0.22812 0.22244
## Proportion of Variance 0.00759 0.00487 0.00294 0.00247 0.00185 0.00163 0.00155
## Cumulative Proportion  0.97955 0.98442 0.98736 0.98984 0.99169 0.99332 0.99487
##                           PC22    PC23    PC24    PC25    PC26    PC27    PC28
## Standard deviation     0.17626 0.17353 0.16603 0.15617 0.13476 0.12484 0.09023
## Proportion of Variance 0.00097 0.00094 0.00086 0.00076 0.00057 0.00049 0.00025
## Cumulative Proportion  0.99584 0.99678 0.99764 0.99840 0.99897 0.99946 0.99971
##                           PC29    PC30    PC31    PC32
## Standard deviation     0.08256 0.03995 0.02734 0.01135
## Proportion of Variance 0.00021 0.00005 0.00002 0.00000
## Cumulative Proportion  0.99992 0.99997 1.00000 1.00000
#bca_pca$rotation
#print(bca_pca)
## Sample scores stored in sparrow_pca$x
#bca_pca$x
# Identifying the scores by their survival status
bcatyp_pca <- cbind(data.frame(Diagnosis),bca_pca$x)
#bcatyp_pca
# Means of scores for all the PC's classified by Survival status
tabmeansPC <- aggregate(bcatyp_pca[,2:33],by=list(Diagnosis=BC_data$Diagnosis),mean)
#tabmeansPC
tabmeansPC <- tabmeansPC[rev(order(tabmeansPC$Diagnosis)),]
#tabmeansPC
tabfmeans <- t(tabmeansPC[,-1])
#tabfmeans
colnames(tabfmeans) <- t(as.vector(tabmeansPC[1]$Diagnosis))
#tabfmeans
# Standard deviations of scores for all the PC's classified by Survival status
tabsdsPC <- aggregate(bcatyp_pca[,2:33],by=list(Diagnosis=BC_data$Diagnosis),sd)
tabfsds <- t(tabsdsPC[,-1])
colnames(tabfsds) <- t(as.vector(tabsdsPC[1]$Diagnosis))
#tabfsds
t.test(PC1~BC_data$Diagnosis,data=bcatyp_pca) 
## 
##  Welch Two Sample t-test
## 
## data:  PC1 by BC_data$Diagnosis
## t = -26.369, df = 286.41, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -6.382975 -5.496271
## sample estimates:
## mean in group B mean in group M 
##       -2.206444        3.733178
##p-value < 2.2e-16
t.test(PC2~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC2 by BC_data$Diagnosis
## t = 4.222, df = 356.65, p-value = 3.077e-05
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  0.4930636 1.3529570
## sample estimates:
## mean in group B mean in group M 
##       0.3428788      -0.5801315
t.test(PC3~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC3 by BC_data$Diagnosis
## t = 3.6262, df = 319.6, p-value = 0.0003346
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  0.2659600 0.8968407
## sample estimates:
## mean in group B mean in group M 
##       0.2159780      -0.3654224
t.test(PC4~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC4 by BC_data$Diagnosis
## t = 3.2183, df = 452.33, p-value = 0.001382
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  0.1510156 0.6247041
## sample estimates:
## mean in group B mean in group M 
##       0.1440818      -0.2437781
t.test(PC5~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC5 by BC_data$Diagnosis
## t = -2.5166, df = 534.03, p-value = 0.01214
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.46426316 -0.05721288
## sample estimates:
## mean in group B mean in group M 
##     -0.09685867      0.16387935
t.test(PC6~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC6 by BC_data$Diagnosis
## t = 0.15771, df = 337.54, p-value = 0.8748
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.1904591  0.2236619
## sample estimates:
## mean in group B mean in group M 
##      0.00616707     -0.01043433
t.test(PC7~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC7 by BC_data$Diagnosis
## t = 0.70574, df = 409.97, p-value = 0.4808
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.1110673  0.2354837
## sample estimates:
## mean in group B mean in group M 
##      0.02310902     -0.03909915
t.test(PC8~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC8 by BC_data$Diagnosis
## t = 1.5201, df = 485.35, p-value = 0.1291
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.02918548  0.22869557
## sample estimates:
## mean in group B mean in group M 
##      0.03705689     -0.06269815
t.test(PC9~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC9 by BC_data$Diagnosis
## t = -1.673, df = 306.95, p-value = 0.09536
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.28912604  0.02340931
## sample estimates:
## mean in group B mean in group M 
##     -0.04935408      0.08350429
t.test(PC10~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC10 by BC_data$Diagnosis
## t = -1.4525, df = 414.02, p-value = 0.1471
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.20550940  0.03085812
## sample estimates:
## mean in group B mean in group M 
##     -0.03243963      0.05488601
t.test(PC11~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC11 by BC_data$Diagnosis
## t = 0.64122, df = 367.72, p-value = 0.5218
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.07733515  0.15217372
## sample estimates:
## mean in group B mean in group M 
##      0.01390047     -0.02351881
t.test(PC12~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC12 by BC_data$Diagnosis
## t = -0.063141, df = 456.18, p-value = 0.9497
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.10304968  0.09663385
## sample estimates:
## mean in group B mean in group M 
##    -0.001191673     0.002016243
t.test(PC13~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC13 by BC_data$Diagnosis
## t = -0.27121, df = 435.23, p-value = 0.7864
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.1060410  0.0803243
## sample estimates:
## mean in group B mean in group M 
##    -0.004776610     0.008081752
t.test(PC14~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC14 by BC_data$Diagnosis
## t = -0.068032, df = 440.9, p-value = 0.9458
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.09021088  0.08417441
## sample estimates:
## mean in group B mean in group M 
##    -0.001121210     0.001897024
t.test(PC15~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC15 by BC_data$Diagnosis
## t = 2.0301, df = 301.31, p-value = 0.04322
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  0.002996614 0.192382768
## sample estimates:
## mean in group B mean in group M 
##      0.03628966     -0.06140003
t.test(PC16~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC16 by BC_data$Diagnosis
## t = 1.7652, df = 412.53, p-value = 0.07826
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.007004857  0.130353268
## sample estimates:
## mean in group B mean in group M 
##      0.02291066     -0.03876354
t.test(PC17~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC17 by BC_data$Diagnosis
## t = 2.3717, df = 353.93, p-value = 0.01824
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  0.01150734 0.12325890
## sample estimates:
## mean in group B mean in group M 
##      0.02503140     -0.04235171
t.test(PC18~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC18 by BC_data$Diagnosis
## t = 0.05802, df = 390.24, p-value = 0.9538
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.04848328  0.05143186
## sample estimates:
## mean in group B mean in group M 
##    0.0005476693   -0.0009266253
t.test(PC19~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC19 by BC_data$Diagnosis
## t = -0.58216, df = 287.14, p-value = 0.5609
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.06194417  0.03366554
## sample estimates:
## mean in group B mean in group M 
##    -0.005252457     0.008886858
t.test(PC20~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC20 by BC_data$Diagnosis
## t = 1.5089, df = 291.38, p-value = 0.1324
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.01037032  0.07852305
## sample estimates:
## mean in group B mean in group M 
##      0.01265865     -0.02141771
t.test(PC21~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC21 by BC_data$Diagnosis
## t = 1.2616, df = 300.8, p-value = 0.2081
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.01539713  0.07039938
## sample estimates:
## mean in group B mean in group M 
##      0.01021609     -0.01728504
t.test(PC22~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC22 by BC_data$Diagnosis
## t = 1.2681, df = 336.19, p-value = 0.2056
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.01163875  0.05387201
## sample estimates:
## mean in group B mean in group M 
##     0.007844382    -0.013272248
t.test(PC23~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC23 by BC_data$Diagnosis
## t = 0.99216, df = 377.55, p-value = 0.3218
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.01540649  0.04679034
## sample estimates:
## mean in group B mean in group M 
##     0.005829218    -0.009862706
t.test(PC24~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC24 by BC_data$Diagnosis
## t = 0.59476, df = 350.74, p-value = 0.5524
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.02126002  0.03969238
## sample estimates:
## mean in group B mean in group M 
##     0.003423616    -0.005792563
t.test(PC25~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC25 by BC_data$Diagnosis
## t = -0.50442, df = 302.95, p-value = 0.6143
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.03780270  0.02237662
## sample estimates:
## mean in group B mean in group M 
##    -0.002865232     0.004847809
t.test(PC26~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC26 by BC_data$Diagnosis
## t = -1.6381, df = 319.92, p-value = 0.1024
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.046592993  0.004255987
## sample estimates:
## mean in group B mean in group M 
##    -0.007863652     0.013304851
t.test(PC27~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC27 by BC_data$Diagnosis
## t = 0.18981, df = 305.6, p-value = 0.8496
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.02167276  0.02630018
## sample estimates:
## mean in group B mean in group M 
##    0.0008594953   -0.0014542171
t.test(PC28~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC28 by BC_data$Diagnosis
## t = 1.2535, df = 304.81, p-value = 0.211
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.006287387  0.028356092
## sample estimates:
## mean in group B mean in group M 
##     0.004099029    -0.006935324
t.test(PC29~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC29 by BC_data$Diagnosis
## t = 0.77703, df = 310.83, p-value = 0.4377
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.009537637  0.021986910
## sample estimates:
## mean in group B mean in group M 
##     0.002312321    -0.003912316
t.test(PC30~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC30 by BC_data$Diagnosis
## t = 0.28563, df = 263, p-value = 0.7754
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.006925811  0.009276098
## sample estimates:
## mean in group B mean in group M 
##    0.0004365410   -0.0007386026
t.test(PC31~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC31 by BC_data$Diagnosis
## t = 0.89066, df = 269.07, p-value = 0.3739
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.003007261  0.007975776
## sample estimates:
## mean in group B mean in group M 
##    0.0009228492   -0.0015614084
t.test(PC32~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  Welch Two Sample t-test
## 
## data:  PC32 by BC_data$Diagnosis
## t = 0.52676, df = 281, p-value = 0.5988
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
##  -0.001644605  0.002846424
## sample estimates:
## mean in group B mean in group M 
##    0.0002232252   -0.0003776844
## F ratio tests
var.test(PC1~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC1 by BC_data$Diagnosis
## F = 0.30251, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.2366496 0.3837642
## sample estimates:
## ratio of variances 
##          0.3025121
var.test(PC2~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC2 by BC_data$Diagnosis
## F = 0.59095, num df = 356, denom df = 210, p-value = 1.327e-05
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.4622905 0.7496760
## sample estimates:
## ratio of variances 
##          0.5909516
var.test(PC3~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC3 by BC_data$Diagnosis
## F = 0.43579, num df = 356, denom df = 210, p-value = 5.047e-12
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.3409085 0.5528362
## sample estimates:
## ratio of variances 
##          0.4357874
var.test(PC4~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC4 by BC_data$Diagnosis
## F = 1.0673, num df = 356, denom df = 210, p-value = 0.6053
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8349345 1.3539763
## sample estimates:
## ratio of variances 
##           1.067307
var.test(PC5~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC5 by BC_data$Diagnosis
## F = 1.7665, num df = 356, denom df = 210, p-value = 7.715e-06
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  1.381905 2.240974
## sample estimates:
## ratio of variances 
##           1.766506
var.test(PC6~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC6 by BC_data$Diagnosis
## F = 0.5098, num df = 356, denom df = 210, p-value = 2.254e-08
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.3988079 0.6467291
## sample estimates:
## ratio of variances 
##           0.509801
var.test(PC7~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC7 by BC_data$Diagnosis
## F = 0.83695, num df = 356, denom df = 210, p-value = 0.1427
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.6547291 1.0617453
## sample estimates:
## ratio of variances 
##          0.8369482
var.test(PC8~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC8 by BC_data$Diagnosis
## F = 8.0262, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##   6.278731 10.181940
## sample estimates:
## ratio of variances 
##           8.026178
var.test(PC9~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC9 by BC_data$Diagnosis
## F = 0.38454, num df = 356, denom df = 210, p-value = 1.878e-15
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.3008210 0.4878281
## sample estimates:
## ratio of variances 
##          0.3845431
var.test(PC10~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC10 by BC_data$Diagnosis
## F = 0.85729, num df = 356, denom df = 210, p-value = 0.2046
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.6706437 1.0875532
## sample estimates:
## ratio of variances 
##           0.857292
var.test(PC11~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC11 by BC_data$Diagnosis
## F = 0.63934, num df = 356, denom df = 210, p-value = 0.0002167
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.500140 0.811055
## sample estimates:
## ratio of variances 
##          0.6393351
var.test(PC12~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC12 by BC_data$Diagnosis
## F = 1.0906, num df = 356, denom df = 210, p-value = 0.4896
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8531713 1.3835502
## sample estimates:
## ratio of variances 
##           1.090619
var.test(PC13~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC13 by BC_data$Diagnosis
## F = 0.9691, num df = 356, denom df = 210, p-value = 0.7904
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.7581115 1.2293959
## sample estimates:
## ratio of variances 
##          0.9691032
var.test(PC14~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC14 by BC_data$Diagnosis
## F = 1.0007, num df = 356, denom df = 210, p-value = 0.9963
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.7828504 1.2695138
## sample estimates:
## ratio of variances 
##           1.000727
var.test(PC15~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC15 by BC_data$Diagnosis
## F = 0.36187, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.2830879 0.4590710
## sample estimates:
## ratio of variances 
##          0.3618746
var.test(PC16~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC16 by BC_data$Diagnosis
## F = 0.84976, num df = 356, denom df = 210, p-value = 0.1799
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.664755 1.078004
## sample estimates:
## ratio of variances 
##          0.8497644
var.test(PC17~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC17 by BC_data$Diagnosis
## F = 0.57925, num df = 356, denom df = 210, p-value = 6.103e-06
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.4531336 0.7348267
## sample estimates:
## ratio of variances 
##          0.5792462
var.test(PC18~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC18 by BC_data$Diagnosis
## F = 0.74173, num df = 356, denom df = 210, p-value = 0.01376
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5802426 0.9409538
## sample estimates:
## ratio of variances 
##          0.7417312
var.test(PC19~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC19 by BC_data$Diagnosis
## F = 0.30542, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.2389225 0.3874500
## sample estimates:
## ratio of variances 
##          0.3054175
var.test(PC20~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC20 by BC_data$Diagnosis
## F = 0.32225, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.2520934 0.4088088
## sample estimates:
## ratio of variances 
##          0.3222541
var.test(PC21~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC21 by BC_data$Diagnosis
## F = 0.35985, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.2815050 0.4565042
## sample estimates:
## ratio of variances 
##          0.3598512
var.test(PC22~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC22 by BC_data$Diagnosis
## F = 0.50418, num df = 356, denom df = 210, p-value = 1.315e-08
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.3944098 0.6395969
## sample estimates:
## ratio of variances 
##          0.5041789
var.test(PC23~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC23 by BC_data$Diagnosis
## F = 0.68331, num df = 356, denom df = 210, p-value = 0.001663
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5345431 0.8668448
## sample estimates:
## ratio of variances 
##          0.6833129
var.test(PC24~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC24 by BC_data$Diagnosis
## F = 0.56554, num df = 356, denom df = 210, p-value = 2.324e-06
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.4424154 0.7174454
## sample estimates:
## ratio of variances 
##          0.5655449
var.test(PC25~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC25 by BC_data$Diagnosis
## F = 0.36848, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.2882585 0.4674560
## sample estimates:
## ratio of variances 
##          0.3684843
var.test(PC26~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC26 by BC_data$Diagnosis
## F = 0.43709, num df = 356, denom df = 210, p-value = 6.017e-12
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.3419287 0.5544906
## sample estimates:
## ratio of variances 
##          0.4370916
var.test(PC27~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC27 by BC_data$Diagnosis
## F = 0.37912, num df = 356, denom df = 210, p-value = 7.177e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.2965811 0.4809525
## sample estimates:
## ratio of variances 
##          0.3791232
var.test(PC28~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC28 by BC_data$Diagnosis
## F = 0.37591, num df = 356, denom df = 210, p-value = 4.008e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.2940709 0.4768817
## sample estimates:
## ratio of variances 
##          0.3759143
var.test(PC29~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC29 by BC_data$Diagnosis
## F = 0.40017, num df = 356, denom df = 210, p-value = 2.603e-14
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.3130490 0.5076577
## sample estimates:
## ratio of variances 
##          0.4001744
var.test(PC30~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC30 by BC_data$Diagnosis
## F = 0.2101, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.1643607 0.2665365
## sample estimates:
## ratio of variances 
##          0.2101043
var.test(PC31~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC31 by BC_data$Diagnosis
## F = 0.234, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.1830544 0.2968512
## sample estimates:
## ratio of variances 
##          0.2340006
var.test(PC32~BC_data$Diagnosis,data=bcatyp_pca)
## 
##  F test to compare two variances
## 
## data:  PC32 by BC_data$Diagnosis
## F = 0.2811, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.2198996 0.3566015
## sample estimates:
## ratio of variances 
##          0.2811004
# Better Ways to Visualize

library(factoextra)
library(FactoMineR)
library(ggfortify)
library(psych)
library(corrplot)
library(devtools)

# Correlation
pairs.panels(BC_data[,-1],
             gap = 0,
             bg = c("red", "blue")[BC_data$Diagnosis],
             pch=21)

pairs.panels(bca_pca$x,
             gap=0,
             bg = c("red", "blue")[BC_data$Diagnosis],
             pch=21)

fviz_eig(bca_pca, addlabels = TRUE)

fviz_pca_var(bca_pca,col.var = "cos2",
             gradient.cols = c("#FFCC00", "#CC9933", "#660033", "#330033"),
             repel = TRUE)

fviz_pca_ind(bca_pca, col.ind = "cos2", 
                  gradient.cols = c("#FFCC00", "#CC9933", "#660033", "#330033"), 
                  repel = TRUE)

biplot(bca_pca)

autoplot(bca_pca,
         data = BC_data[,-1],
         loadings = TRUE,
         labels = BC_data$Diagnosis)

# Different PCA Method. 
res.pca <- PCA(BC_data[,-2], graph = FALSE)
print(res.pca)
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 568 individuals, described by 32 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"
# Visualize and Interpret PCA using these functions 

#get_eigenvalue(res.pca): Extract the eigenvalues/variances of principal components
#fviz_eig(res.pca): Visualize the eigenvalues
#get_pca_ind(res.pca), get_pca_var(res.pca): Extract the results for individuals and variables, respectively.
#fviz_pca_ind(res.pca), fviz_pca_var(res.pca): Visualize the results individuals and variables, respectively.
#fviz_pca_biplot(res.pca): Make a biplot of individuals and variables.

eig.val <- get_eigenvalue(res.pca)
eig.val
##          eigenvalue variance.percent cumulative.variance.percent
## Dim.1  1.334011e+01     41.687850471                    41.68785
## Dim.2  5.740679e+00     17.939623292                    59.62747
## Dim.3  2.856671e+00      8.927098271                    68.55457
## Dim.4  1.990515e+00      6.220359683                    74.77493
## Dim.5  1.655248e+00      5.172648992                    79.94758
## Dim.6  1.240621e+00      3.876939831                    83.82452
## Dim.7  9.835331e-01      3.073540996                    86.89806
## Dim.8  8.581344e-01      2.681670143                    89.57973
## Dim.9  6.675166e-01      2.085989522                    91.66572
## Dim.10 4.617481e-01      1.442962783                    93.10868
## Dim.11 4.028135e-01      1.258792329                    94.36748
## Dim.12 3.494252e-01      1.091953839                    95.45943
## Dim.13 2.952150e-01      0.922546992                    96.38198
## Dim.14 2.606132e-01      0.814416258                    97.19639
## Dim.15 2.427493e-01      0.758591484                    97.95498
## Dim.16 1.558668e-01      0.487083715                    98.44207
## Dim.17 9.419889e-02      0.294371543                    98.73644
## Dim.18 7.918881e-02      0.247465027                    98.98391
## Dim.19 5.931593e-02      0.185362274                    99.16927
## Dim.20 5.203975e-02      0.162624226                    99.33189
## Dim.21 4.948143e-02      0.154629475                    99.48652
## Dim.22 3.106843e-02      0.097088831                    99.58361
## Dim.23 3.011234e-02      0.094101070                    99.67771
## Dim.24 2.756705e-02      0.086147045                    99.76386
## Dim.25 2.439053e-02      0.076220391                    99.84008
## Dim.26 1.816117e-02      0.056753656                    99.89683
## Dim.27 1.558456e-02      0.048701748                    99.94553
## Dim.28 8.140586e-03      0.025439333                    99.97097
## Dim.29 6.816197e-03      0.021300616                    99.99227
## Dim.30 1.596090e-03      0.004987780                    99.99726
## Dim.31 7.474333e-04      0.002335729                    99.99960
## Dim.32 1.288493e-04      0.000402654                   100.00000
fviz_eig(res.pca, addlabels = TRUE, ylim = c(0, 50))

var <- get_pca_var(res.pca)
#var$coord: coordinates of variables to create a scatter plot
#var$cos2: represents the quality of representation for variables on the factor map. It’s calculated as the squared coordinates: var.cos2 = var.coord * var.coord.
#var$contrib: contains the contributions (in percentage) of the variables to the principal components. 
#The contribution of a variable (var) to a given principal component is (in percentage) : (var.cos2 * 100) / (total cos2 of the component).
var
## Principal Component Analysis Results for variables
##  ===================================================
##   Name       Description                                    
## 1 "$coord"   "Coordinates for the variables"                
## 2 "$cor"     "Correlations between variables and dimensions"
## 3 "$cos2"    "Cos2 for the variables"                       
## 4 "$contrib" "contributions of the variables"
# Coordinates
head(var$coord)
##                      Dim.1      Dim.2       Dim.3       Dim.4        Dim.5
## ID              0.08531238 -0.0805740  0.16204864  0.03292313  0.013155817
## mean_radius     0.79636361 -0.5591083 -0.02209146 -0.06415462 -0.045516453
## mean_texture    0.39530442 -0.1352453  0.11116703  0.83803301  0.030946335
## mean_perimeter  0.82762692 -0.5148999 -0.02344368 -0.06531541 -0.045213275
## mean_area       0.80480447 -0.5524695  0.04177067 -0.07968590 -0.009409548
## mean_smoothness 0.51607144  0.4426651 -0.17509212 -0.21666598  0.481902877
# Cos2: quality on the factore map
head(var$cos2)
##                       Dim.1      Dim.2        Dim.3       Dim.4        Dim.5
## ID              0.007278202 0.00649217 0.0262597617 0.001083932 1.730755e-04
## mean_radius     0.634195001 0.31260214 0.0004880324 0.004115815 2.071747e-03
## mean_texture    0.156265581 0.01829128 0.0123581088 0.702299323 9.576756e-04
## mean_perimeter  0.684966323 0.26512195 0.0005496060 0.004266103 2.044240e-03
## mean_area       0.647710238 0.30522260 0.0017447886 0.006349843 8.853959e-05
## mean_smoothness 0.266329733 0.19595243 0.0306572498 0.046944147 2.322304e-01
# Contributions to the principal components
head(var$contrib)
##                      Dim.1     Dim.2      Dim.3       Dim.4        Dim.5
## ID              0.05455878 0.1130906 0.91924333  0.05445487  0.010456171
## mean_radius     4.75404550 5.4453857 0.01708395  0.20677135  0.125162385
## mean_texture    1.17139630 0.3186257 0.43260518 35.28229065  0.057856938
## mean_perimeter  5.13463691 4.6183027 0.01923938  0.21432157  0.123500565
## mean_area       4.85535826 5.3168375 0.06107768  0.31900502  0.005349023
## mean_smoothness 1.99645798 3.4134013 1.07318081  2.35839189 14.029947686
#The plot Below is also known as variable correlation plots. It shows the relationships between all variables. It can be interpreted as follow:

#Positively correlated variables are grouped together.
#Negatively correlated variables are positioned on opposite sides of the plot origin (opposed quadrants).
#The distance between variables and the origin measures the quality of the variables on the factor map. 
#Variables that are away from the origin are well represented on the factor map.

# Correlation circle
fviz_pca_var(res.pca, col.var = "black")

# Quality of representation


corrplot(var$cos2, is.corr=FALSE)

# Total cos2 of variables on Dim.1 and Dim.2
#A high cos2 indicates a good representation of the variable on the principal component. 
#In this case the variable is positioned close to the circumference of the correlation circle.
#A low cos2 indicates that the variable is not perfectly represented by the PCs. 
#In this case the variable is close to the center of the circle.

fviz_cos2(res.pca, choice = "var", axes = 1:2)

fviz_pca_var(res.pca, col.var = "cos2",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), 
             repel = TRUE # Avoid text overlapping
             )

# Change the transparency by cos2 values
fviz_pca_var(res.pca, alpha.var = "cos2")

corrplot(var$contrib, is.corr=FALSE)

# Contributions of variables to PC1
fviz_contrib(res.pca, choice = "var", axes = 1, top = 10)

# Contributions of variables to PC2
fviz_contrib(res.pca, choice = "var", axes = 2, top = 10)

fviz_pca_var(res.pca, col.var = "contrib",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07")
             )

fviz_pca_var(res.pca, alpha.var = "contrib")

fviz_pca_ind(res.pca,
             geom.ind = "point", # show points only (but not "text")
             col.ind = BC_data$Diagnosis, # color by groups
             palette = c("#00AFBB", "#E7B800", "#FC4E07"),
             addEllipses = TRUE, # Concentration ellipses
             legend.title = "Groups"
             )

fviz_pca_var(res.pca, axes.linetype = "blank")

ind.p <- fviz_pca_ind(res.pca, geom = "point", col.ind = BC_data$Diagnosis)
ggpubr::ggpar(ind.p,
              title = "Principal Component Analysis",
              subtitle = "Breast Cancer data set",
              caption = "Source: UCI",
              xlab = "PC1", ylab = "PC2",
              legend.title = "Diagnosis", legend.position = "top",
              ggtheme = theme_gray(), palette = "jco"
              )

fviz_pca_biplot(res.pca, repel = TRUE,col.ind = BC_data$Diagnosis,
                col.var = "#2E9FDF", # Variables color
                )

fviz_pca_biplot(res.pca, 
                col.ind = BC_data$Diagnosis, palette = "jco", 
                addEllipses = TRUE, label = "var",
                col.var = "black", repel = TRUE,
                legend.title = "Diagnosis") 

fviz_pca_biplot(res.pca, 
                # Fill individuals by groups
                geom.ind = "point",
                pointshape = 21,
                pointsize = 2.5,
                fill.ind = BC_data$Diagnosis,
                col.ind = "black",
                # Color variable by groups
                legend.title = list(fill = "Diagnosis", color = "Clusters"),
                repel = TRUE        # Avoid label overplotting
             )+
  ggpubr::fill_palette("jco")+      # Indiviual fill color
  ggpubr::color_palette("npg")      # Variable colors

fviz_pca_biplot(res.pca, 
                # Individuals
                geom.ind = "point",
                fill.ind = BC_data$Diagnosis, col.ind = "black",
                pointshape = 21, pointsize = 2,
                palette = "jco",
                addEllipses = TRUE,
                # Variables
                alpha.var ="contrib", col.var = "contrib",
                gradient.cols = "RdYlBu",
                
                legend.title = list(fill = "Diagnosis", color = "Contrib",
                                    alpha = "Contrib")
                )